LINEAGE2 plots: p value stars

This commit is contained in:
Tanushree Tunstall 2022-06-29 17:18:55 +01:00
parent 087170a798
commit 8965bee5d6

View file

@ -4,15 +4,20 @@ library("ggforce")
#install.packages("gginference")
library(gginference)
library(ggpubr)
library(svglite)
##################################################
#%% read data
# TODO: read data using gene and drug combination
# DOME: read data using gene and drug combination
# gene must be lowercase
# tolower(gene)
#################################################
gene="pncA"
drug="pyrazinamide"
lineage_filename=paste0(tolower(gene),"_merged_df2.csv")
lineage_data_path="~/git/Data/pyrazinamide/output"
df = read.csv("/home/tanu/git/Data/pyrazinamide/output/pnca_merged_df2.csv")
df = read.csv(paste0(lineage_data_path,"/",lineage_filename))
#df2 = read.csv("/home/tanu/git/Data/pyrazinamide/output/pnca_merged_df3.csv")
foo = as.data.frame(colnames(df))
@ -96,33 +101,50 @@ cat("\nnrow of plot_df:", nrow(plot_df))
#-----------------------------------------------
plot_df$pval = NULL
for (i in lin_muts) {
print (i)
#print (i)
s_mut = plot_df[plot_df$mutationinformation == i,]
print(s_mut)
#print(s_mut)
s_tab = table(s_mut$lineage, s_mut$sensitivity)
print(s_tab)
ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3)
print(ft_pvalue_i)
#print(s_tab)
#ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3)
ft_pvalue_i = fisher.test(s_tab)$p.value
#print(ft_pvalue_i)
plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i
#print(s_tab)
}
plot_df$pvalR = round(plot_df$pval, 3)
plot_df$pvalRF = ifelse(plot_df$pvalR == 0.05, paste0("p=",plot_df$pvalR, "."), plot_df$pvalR )
plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.05, paste0("p=",plot_df$pvalR, "*"), plot_df$pvalRF )
plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.01, paste0("p=",plot_df$pvalR, "**"), plot_df$pvalRF )
plot_df$pvalRF = ifelse(plot_df$pvalR == 0, 'p<0.001, ***', plot_df$pvalRF)
head(plot_df$pval)
head(plot_df$pvalR)
View(plot_df$pvalRF)
# format p value
# TODO: add case statement for correct pvalue formatting
plot_df$pvalF = ifelse(plot_df$pval < 0.05, paste0(plot_df$pval, "*"), plot_df$pval )
plot_df$pvalF
#plot_df$pvalF = ifelse(plot_df$pval <= 0.0001, paste0(round(plot_df$pval, 3), "**** "), plot_df$pval )
# plot_df$pvalF = ifelse(plot_df$pval <= 0.001, paste0(round(plot_df$pval, 3), "*** "), plot_df$pval )
# plot_df$pvalF = ifelse(plot_df$pval <= 0.01, paste0(round(plot_df$pval, 3), "** "), plot_df$pval )
# plot_df$pvalF = ifelse(plot_df$pval < 0.05, paste0(round(plot_df$pval, 3), "* "), plot_df$pval )
# plot_df$pvalF = ifelse(plot_df$pval == 0.05, paste0(round(plot_df$pval, 3), ". "), plot_df$pval )
round(plot_df$pvalF, 3)
#================================================
# Plot attempt 1 [no stats]: WORKS beeautifully
#================================================
ggplot(plot_df, aes(x = lineage
, fill = factor(sensitivity))) +
geom_bar(stat = 'count')+
#coord_cartesian(ylim = c(0, ypos_label)) +
facet_wrap(~mutationinformation
, scales = 'free_y')
# ggplot(plot_df, aes(x = lineage
# , fill = factor(sensitivity))) +
# geom_bar(stat = 'count')+
# #coord_cartesian(ylim = c(0, ypos_label)) +
# facet_wrap(~mutationinformation
# , scales = 'free_y')
#########################################################
#================================================
@ -175,22 +197,46 @@ if (nrow(lin_muts_dfM) == nrow(plot_df) ){
#================================================
# Plot: with stats (plot_df2)
# TODO:
#1) Add gene name from variable as plot title. <Placeholder provided>
#2) Add: facet_wrap_paginate () to allow graphs to span over multiple pages
#1) DONE: Add gene name from variable as plot title. <Placeholder provided>
#2) DONE: Add: facet_wrap_paginate () to allow graphs to span over multiple pages
#3) Add *: Extend yaxis for each plot to allow geom_label to have space (or see
# if this self resolving with facet_wrap_paginate())
#================================================
p_title = "<Insert gene>"
plot_pages = round(length(lin_muts)/25)
p_title = gene
res = 144 # SVG dots-per-inch
sapply(1:plot_pages, function(page){
print(paste0("Plotting page:", page))
svglite(paste0("/tmp/output-",page,".svg"), width=2048/res, height=1534/res) # old-school square 4:3 CRT shape 1.3:1
print(
ggplot(plot_df2, aes(x = lineage
, fill = sensitivity)) +
geom_bar(stat = 'count') +
facet_wrap(~mutationinformation
, scales = 'free_y') +
theme(legend.position = "top")+
labs(title = p_title) +
facet_wrap_paginate(~mutationinformation
, scales = 'free_y'
, ncol = 5
, nrow = 5
, page = page) +
theme(legend.position = "top"
, plot.title = element_text(hjust = 0.5, size=20)
, strip.text = element_text(size=14)
, axis.text.x = element_text(size=14)
, axis.text.y = element_text(size=14)
, axis.title.y = element_text(size=14)
, legend.title = element_blank()
, axis.title.x = element_blank()
)+
labs(title = paste0(p_title, ": sensitivity by lineage")
, y = 'Sample Count'
) +
#geom_text(aes(label = p.value, x = 0.5, y = 5))
geom_label(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1), fill="white")# +
geom_blank(aes(y = ypos_label+2)) +
geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white")
)
dev.off()
}
)
#geom_text(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1))# +
#geom_segment(aes(x = 1, y = ypos_label+0.5, xend = 4, yend = ypos_label+0.5))