From 8965bee5d6469997ead0d1329956742a89488ef4 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 29 Jun 2022 17:18:55 +0100 Subject: [PATCH] LINEAGE2 plots: p value stars --- scripts/plotting/LINEAGE2.R | 104 ++++++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 29 deletions(-) diff --git a/scripts/plotting/LINEAGE2.R b/scripts/plotting/LINEAGE2.R index c399df1..8171757 100644 --- a/scripts/plotting/LINEAGE2.R +++ b/scripts/plotting/LINEAGE2.R @@ -4,15 +4,20 @@ library("ggforce") #install.packages("gginference") library(gginference) library(ggpubr) +library(svglite) ################################################## #%% read data -# TODO: read data using gene and drug combination +# DOME: read data using gene and drug combination # gene must be lowercase # tolower(gene) ################################################# +gene="pncA" +drug="pyrazinamide" +lineage_filename=paste0(tolower(gene),"_merged_df2.csv") +lineage_data_path="~/git/Data/pyrazinamide/output" -df = read.csv("/home/tanu/git/Data/pyrazinamide/output/pnca_merged_df2.csv") +df = read.csv(paste0(lineage_data_path,"/",lineage_filename)) #df2 = read.csv("/home/tanu/git/Data/pyrazinamide/output/pnca_merged_df3.csv") foo = as.data.frame(colnames(df)) @@ -96,33 +101,50 @@ cat("\nnrow of plot_df:", nrow(plot_df)) #----------------------------------------------- plot_df$pval = NULL for (i in lin_muts) { - print (i) + #print (i) s_mut = plot_df[plot_df$mutationinformation == i,] - print(s_mut) + #print(s_mut) s_tab = table(s_mut$lineage, s_mut$sensitivity) - print(s_tab) - ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3) - - print(ft_pvalue_i) + #print(s_tab) + #ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3) + ft_pvalue_i = fisher.test(s_tab)$p.value + #print(ft_pvalue_i) plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i #print(s_tab) } +plot_df$pvalR = round(plot_df$pval, 3) + +plot_df$pvalRF = ifelse(plot_df$pvalR == 0.05, paste0("p=",plot_df$pvalR, "."), plot_df$pvalR ) +plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.05, paste0("p=",plot_df$pvalR, "*"), plot_df$pvalRF ) +plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.01, paste0("p=",plot_df$pvalR, "**"), plot_df$pvalRF ) +plot_df$pvalRF = ifelse(plot_df$pvalR == 0, 'p<0.001, ***', plot_df$pvalRF) + + + head(plot_df$pval) +head(plot_df$pvalR) +View(plot_df$pvalRF) # format p value # TODO: add case statement for correct pvalue formatting -plot_df$pvalF = ifelse(plot_df$pval < 0.05, paste0(plot_df$pval, "*"), plot_df$pval ) -plot_df$pvalF +#plot_df$pvalF = ifelse(plot_df$pval <= 0.0001, paste0(round(plot_df$pval, 3), "**** "), plot_df$pval ) +# plot_df$pvalF = ifelse(plot_df$pval <= 0.001, paste0(round(plot_df$pval, 3), "*** "), plot_df$pval ) +# plot_df$pvalF = ifelse(plot_df$pval <= 0.01, paste0(round(plot_df$pval, 3), "** "), plot_df$pval ) +# plot_df$pvalF = ifelse(plot_df$pval < 0.05, paste0(round(plot_df$pval, 3), "* "), plot_df$pval ) +# plot_df$pvalF = ifelse(plot_df$pval == 0.05, paste0(round(plot_df$pval, 3), ". "), plot_df$pval ) + +round(plot_df$pvalF, 3) + #================================================ # Plot attempt 1 [no stats]: WORKS beeautifully #================================================ -ggplot(plot_df, aes(x = lineage - , fill = factor(sensitivity))) + - geom_bar(stat = 'count')+ - #coord_cartesian(ylim = c(0, ypos_label)) + - facet_wrap(~mutationinformation - , scales = 'free_y') +# ggplot(plot_df, aes(x = lineage +# , fill = factor(sensitivity))) + +# geom_bar(stat = 'count')+ +# #coord_cartesian(ylim = c(0, ypos_label)) + +# facet_wrap(~mutationinformation +# , scales = 'free_y') ######################################################### #================================================ @@ -175,23 +197,47 @@ if (nrow(lin_muts_dfM) == nrow(plot_df) ){ #================================================ # Plot: with stats (plot_df2) # TODO: -#1) Add gene name from variable as plot title. -#2) Add: facet_wrap_paginate () to allow graphs to span over multiple pages +#1) DONE: Add gene name from variable as plot title. +#2) DONE: Add: facet_wrap_paginate () to allow graphs to span over multiple pages #3) Add *: Extend yaxis for each plot to allow geom_label to have space (or see # if this self resolving with facet_wrap_paginate()) #================================================ -p_title = "" +plot_pages = round(length(lin_muts)/25) +p_title = gene +res = 144 # SVG dots-per-inch -ggplot(plot_df2, aes(x = lineage - , fill = sensitivity)) + - geom_bar(stat = 'count') + - facet_wrap(~mutationinformation - , scales = 'free_y') + - theme(legend.position = "top")+ - labs(title = p_title) + - #geom_text(aes(label = p.value, x = 0.5, y = 5)) - geom_label(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1), fill="white")# + - #geom_text(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1))# + +sapply(1:plot_pages, function(page){ + print(paste0("Plotting page:", page)) + svglite(paste0("/tmp/output-",page,".svg"), width=2048/res, height=1534/res) # old-school square 4:3 CRT shape 1.3:1 + print( + ggplot(plot_df2, aes(x = lineage + , fill = sensitivity)) + + geom_bar(stat = 'count') + + facet_wrap_paginate(~mutationinformation + , scales = 'free_y' + , ncol = 5 + , nrow = 5 + , page = page) + + theme(legend.position = "top" + , plot.title = element_text(hjust = 0.5, size=20) + , strip.text = element_text(size=14) + , axis.text.x = element_text(size=14) + , axis.text.y = element_text(size=14) + , axis.title.y = element_text(size=14) + , legend.title = element_blank() + , axis.title.x = element_blank() + )+ + labs(title = paste0(p_title, ": sensitivity by lineage") + , y = 'Sample Count' + ) + + #geom_text(aes(label = p.value, x = 0.5, y = 5)) + geom_blank(aes(y = ypos_label+2)) + + geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white") + ) + dev.off() +} +) +#geom_text(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1))# + #geom_segment(aes(x = 1, y = ypos_label+0.5, xend = 4, yend = ypos_label+0.5)) #geom_hline(data = lin_muts_dfM, aes(yintercept=ypos_label+0.5))