generated ggpairs plots finally
This commit is contained in:
parent
b68841b337
commit
a3e5283a9b
11 changed files with 657 additions and 939 deletions
|
@ -4,22 +4,41 @@ library("ggforce")
|
|||
#install.packages("gginference")
|
||||
library(gginference)
|
||||
library(ggpubr)
|
||||
library(svglite)
|
||||
##################################################
|
||||
#%% read data
|
||||
# DOME: read data using gene and drug combination
|
||||
# gene must be lowercase
|
||||
# tolower(gene)
|
||||
#################################################
|
||||
############################################################
|
||||
#gene="pncA"
|
||||
#drug="pyrazinamide"
|
||||
|
||||
#lineage_filename=paste0(tolower(gene),"_merged_df2.csv")
|
||||
#lineage_data_path="~/git/Data/pyrazinamide/output"
|
||||
|
||||
df2 = read.csv(paste0(lineage_data_path,"/",lineage_filename))
|
||||
#=============
|
||||
# Data: Input
|
||||
#==============
|
||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
# source("~/git/LSHTM_analysis/config/gid.R")
|
||||
source("~/git/LSHTM_analysis/config/katg.R")
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
|
||||
foo = as.data.frame(colnames(df2))
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
||||
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
||||
cat("plots will output to:", outdir_images)
|
||||
|
||||
###########################################################
|
||||
class(merged_df2)
|
||||
foo = as.data.frame(colnames(merged_df2))
|
||||
|
||||
cols_to_subset = c('mutationinformation'
|
||||
, 'snp_frequency'
|
||||
|
@ -36,7 +55,7 @@ cols_to_subset = c('mutationinformation'
|
|||
|
||||
#cols_to_subset%in%foo
|
||||
|
||||
my_df = df2[ ,cols_to_subset]
|
||||
my_df = merged_df2[ ,cols_to_subset]
|
||||
|
||||
# r24p_embb = df_embb[df_embb$mutationinformation == "R24P",]
|
||||
# #tm = c("A102P", "M1T")
|
||||
|
@ -73,10 +92,9 @@ table(my_df2$lineage)
|
|||
|
||||
sel_lineages2 = c("L1", "L2", "L3", "L4")
|
||||
my_df2 = my_df2[my_df2$lineage%in%sel_lineages2,]
|
||||
table(my_df2$lineage)
|
||||
|
||||
sum(table(my_df2$lineage)) == nrow(my_df2)
|
||||
table(my_df2$lineage)
|
||||
table(my_df2$lineage, my_df2$sensitivity)
|
||||
|
||||
# %%
|
||||
# str(my_df2)
|
||||
|
@ -85,6 +103,7 @@ table(my_df2$lineage)
|
|||
|
||||
#%% get only muts which belong to > 1 lineage and have different sensitivity classifications
|
||||
muts = unique(my_df2$mutationinformation)
|
||||
cat ("Total unique muts in L1-L4", tolower(gene), ":", length(muts))
|
||||
#-----------------------------------------------
|
||||
# step 0 : get muts with more than one lineage
|
||||
#-----------------------------------------------
|
||||
|
@ -100,7 +119,6 @@ for (i in muts) {
|
|||
}
|
||||
cat("\nGot:", length(lin_muts), "mutations belonging to >1 lineage with differing drug sensitivities")
|
||||
|
||||
|
||||
#-----------------------------------------------
|
||||
# step 1 : get other muts that do not have this
|
||||
#-----------------------------------------------
|
||||
|
@ -111,7 +129,6 @@ cat("\nGot:", length(consist_muts), "mutations that are consistent")
|
|||
# step 2: subset these muts for plotting
|
||||
#-----------------------------------------------
|
||||
plot_df = my_df2[my_df2$mutationinformation%in%lin_muts,]
|
||||
|
||||
cat("\nnrow of plot_df:", nrow(plot_df))
|
||||
|
||||
#-----------------------------------------------
|
||||
|
@ -125,7 +142,9 @@ for (i in lin_muts) {
|
|||
s_tab = table(s_mut$lineage, s_mut$sens2)
|
||||
#print(s_tab)
|
||||
#ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3)
|
||||
ft_pvalue_i = fisher.test(s_tab)$p.value
|
||||
ft_pvalue_i = fisher.test(s_tab
|
||||
#, workspace=2e9
|
||||
, simulate.p.value=TRUE,B=1e7)$p.value
|
||||
#print(ft_pvalue_i)
|
||||
plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i
|
||||
#print(s_tab)
|
||||
|
@ -155,8 +174,6 @@ plot_df
|
|||
head(plot_df)
|
||||
table(plot_df$pvalR<0.05)
|
||||
|
||||
|
||||
|
||||
# format p value
|
||||
# TODO: add case statement for correct pvalue formatting
|
||||
#plot_df$pvalF = ifelse(plot_df$pval <= 0.0001, paste0(round(plot_df$pval, 3), "**** "), plot_df$pval )
|
||||
|
@ -233,6 +250,7 @@ cat("\nGot:", sig_muts, "mutations that are significant")
|
|||
plot_df_ns = plot_df2[plot_df2$pvalR>0.05,]
|
||||
ns_muts = length(unique(plot_df_ns$mutationinformation))
|
||||
cat("\nGot:", ns_muts, "mutations that are NOT significant")
|
||||
|
||||
p_title = gene
|
||||
ts = 8
|
||||
gls = 3
|
||||
|
@ -244,7 +262,7 @@ gls = 3
|
|||
#3) Add *: Extend yaxis for each plot to allow geom_label to have space (or see
|
||||
# if this self resolving with facet_wrap_paginate())
|
||||
#================================================
|
||||
#svg(paste0(outdir_images, "embb_linDS.svg"), width = 6, height = 10 ) # old-school square 4:3 CRT shape 1.3:1
|
||||
#svg(paste0(outdir_images, tolower(gene), "_linDS.svg"), width = 6, height = 10 ) # old-school square 4:3 CRT shape 1.3:1
|
||||
ds_s = ggplot(plot_df_sig, aes(x = lineage
|
||||
, fill = sens2)) +
|
||||
geom_bar(stat = 'count') +
|
||||
|
@ -280,7 +298,7 @@ ds_s = ggplot(plot_df_sig, aes(x = lineage
|
|||
###################################
|
||||
#ns muts
|
||||
|
||||
#svg(paste0(outdir_images, "embb_linDS_ns.svg"), width =10 , height = 8) # old-school square 4:3 CRT shape 1.3:1
|
||||
#svg(paste0(outdir_images, tolower(gene), "_linDS_ns.svg"), width =10 , height = 8) # old-school square 4:3 CRT shape 1.3:1
|
||||
ds_ns = ggplot(plot_df_ns, aes(x = lineage
|
||||
, fill = sens2)) +
|
||||
geom_bar(stat = 'count') +
|
||||
|
@ -309,31 +327,57 @@ ds_ns = ggplot(plot_df_ns, aes(x = lineage
|
|||
labs(title = paste0(p_title, ": sensitivity by lineage")
|
||||
, y = 'Sample Count')
|
||||
#dev.off()
|
||||
#####################################################################
|
||||
#===================
|
||||
# Combine output
|
||||
#====================
|
||||
|
||||
|
||||
# svg(paste0(outdir_images, "embb_linDS_CL.svg")
|
||||
# svg(paste0(outdir_images, tolower(gene), "_linDS_CL.svg")
|
||||
# , width = 11
|
||||
# , height = 8 )
|
||||
png(paste0(outdir_images, "embb_linDS_CL.png")
|
||||
, width = 11.75
|
||||
png(paste0(outdir_images, tolower(gene), "_linDS_CL2.png")
|
||||
, width = 11.75*1.15
|
||||
, height = 8, units = "in", res = 300 )
|
||||
|
||||
cowplot::plot_grid(ds_s, ds_ns
|
||||
, ncol = 2
|
||||
,rel_widths = c(1,2)
|
||||
#, align = "hv"
|
||||
, rel_widths = c(1,2.5)
|
||||
, labels = "AUTO")
|
||||
|
||||
dev.off()
|
||||
|
||||
########################################################################
|
||||
#==================
|
||||
# Summary output
|
||||
#==================
|
||||
cat ("Total unique muts in ALL samples for", tolower(gene), ":", length(unique(merged_df2$mutationinformation)))
|
||||
other_lin_muts = unique(merged_df2$mutationinformation)[!unique(merged_df2$mutationinformation)%in%unique(my_df2$mutationinformation)]
|
||||
|
||||
cat ("Total unique muts NOT in L1-L4:", length(other_lin_muts))
|
||||
cat("These are:\n", other_lin_muts)
|
||||
other_lin_muts_df = merged_df2[merged_df2$mutationinformation%in%other_lin_muts,]
|
||||
|
||||
if ( length(unique(other_lin_muts_df$mutationinformation)) == length(other_lin_muts)) {
|
||||
cat("\nPASS: other lin muts extracted")
|
||||
}else{
|
||||
stop("\nAbort: other lin muts numbers mismatch")
|
||||
}
|
||||
|
||||
table(other_lin_muts_df$mutationinformation, other_lin_muts_df$lineage)
|
||||
|
||||
cat("\n==============================================\n")
|
||||
cat ("Total samples L1-L4:", nrow(my_df2))
|
||||
table(my_df2$lineage)
|
||||
table(my_df2$lineage, my_df2$sensitivity)
|
||||
|
||||
cat ("Total unique muts in L1-L4", tolower(gene), ":", length(muts))
|
||||
cat("\nGot:", length(lin_muts), "mutations belonging to >1 lineage with differing drug sensitivities")
|
||||
|
||||
cat("\nGot:", sig_muts, "mutations that are significant"
|
||||
, "\nThese are:", unique(plot_df_sig$mutationinformation))
|
||||
|
||||
#geom_text(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1))# +
|
||||
cat("\nGot:", ns_muts, "mutations that are NOT significant"
|
||||
, "\nThese are:", unique(plot_df_ns$mutationinformation))
|
||||
|
||||
#geom_segment(aes(x = 1, y = ypos_label+0.5, xend = 4, yend = ypos_label+0.5))
|
||||
#geom_hline(data = lin_muts_dfM, aes(yintercept=ypos_label+0.5))
|
||||
#geom_bracket(data=lin_muts_dfM, aes(xmin = 1, xmax = 4, y.position = ypos_label+0.5, label=''))
|
||||
cat("\n==============================================\n")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue