LINEAGE2 plots: p value stars
This commit is contained in:
parent
087170a798
commit
8965bee5d6
1 changed files with 75 additions and 29 deletions
|
@ -4,15 +4,20 @@ library("ggforce")
|
||||||
#install.packages("gginference")
|
#install.packages("gginference")
|
||||||
library(gginference)
|
library(gginference)
|
||||||
library(ggpubr)
|
library(ggpubr)
|
||||||
|
library(svglite)
|
||||||
##################################################
|
##################################################
|
||||||
#%% read data
|
#%% read data
|
||||||
# TODO: read data using gene and drug combination
|
# DOME: read data using gene and drug combination
|
||||||
# gene must be lowercase
|
# gene must be lowercase
|
||||||
# tolower(gene)
|
# tolower(gene)
|
||||||
#################################################
|
#################################################
|
||||||
|
gene="pncA"
|
||||||
|
drug="pyrazinamide"
|
||||||
|
|
||||||
|
lineage_filename=paste0(tolower(gene),"_merged_df2.csv")
|
||||||
|
lineage_data_path="~/git/Data/pyrazinamide/output"
|
||||||
|
|
||||||
df = read.csv("/home/tanu/git/Data/pyrazinamide/output/pnca_merged_df2.csv")
|
df = read.csv(paste0(lineage_data_path,"/",lineage_filename))
|
||||||
#df2 = read.csv("/home/tanu/git/Data/pyrazinamide/output/pnca_merged_df3.csv")
|
#df2 = read.csv("/home/tanu/git/Data/pyrazinamide/output/pnca_merged_df3.csv")
|
||||||
|
|
||||||
foo = as.data.frame(colnames(df))
|
foo = as.data.frame(colnames(df))
|
||||||
|
@ -96,33 +101,50 @@ cat("\nnrow of plot_df:", nrow(plot_df))
|
||||||
#-----------------------------------------------
|
#-----------------------------------------------
|
||||||
plot_df$pval = NULL
|
plot_df$pval = NULL
|
||||||
for (i in lin_muts) {
|
for (i in lin_muts) {
|
||||||
print (i)
|
#print (i)
|
||||||
s_mut = plot_df[plot_df$mutationinformation == i,]
|
s_mut = plot_df[plot_df$mutationinformation == i,]
|
||||||
print(s_mut)
|
#print(s_mut)
|
||||||
s_tab = table(s_mut$lineage, s_mut$sensitivity)
|
s_tab = table(s_mut$lineage, s_mut$sensitivity)
|
||||||
print(s_tab)
|
#print(s_tab)
|
||||||
ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3)
|
#ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3)
|
||||||
|
ft_pvalue_i = fisher.test(s_tab)$p.value
|
||||||
print(ft_pvalue_i)
|
#print(ft_pvalue_i)
|
||||||
plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i
|
plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i
|
||||||
#print(s_tab)
|
#print(s_tab)
|
||||||
}
|
}
|
||||||
|
plot_df$pvalR = round(plot_df$pval, 3)
|
||||||
|
|
||||||
|
plot_df$pvalRF = ifelse(plot_df$pvalR == 0.05, paste0("p=",plot_df$pvalR, "."), plot_df$pvalR )
|
||||||
|
plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.05, paste0("p=",plot_df$pvalR, "*"), plot_df$pvalRF )
|
||||||
|
plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.01, paste0("p=",plot_df$pvalR, "**"), plot_df$pvalRF )
|
||||||
|
plot_df$pvalRF = ifelse(plot_df$pvalR == 0, 'p<0.001, ***', plot_df$pvalRF)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
head(plot_df$pval)
|
head(plot_df$pval)
|
||||||
|
head(plot_df$pvalR)
|
||||||
|
View(plot_df$pvalRF)
|
||||||
|
|
||||||
# format p value
|
# format p value
|
||||||
# TODO: add case statement for correct pvalue formatting
|
# TODO: add case statement for correct pvalue formatting
|
||||||
plot_df$pvalF = ifelse(plot_df$pval < 0.05, paste0(plot_df$pval, "*"), plot_df$pval )
|
#plot_df$pvalF = ifelse(plot_df$pval <= 0.0001, paste0(round(plot_df$pval, 3), "**** "), plot_df$pval )
|
||||||
plot_df$pvalF
|
# plot_df$pvalF = ifelse(plot_df$pval <= 0.001, paste0(round(plot_df$pval, 3), "*** "), plot_df$pval )
|
||||||
|
# plot_df$pvalF = ifelse(plot_df$pval <= 0.01, paste0(round(plot_df$pval, 3), "** "), plot_df$pval )
|
||||||
|
# plot_df$pvalF = ifelse(plot_df$pval < 0.05, paste0(round(plot_df$pval, 3), "* "), plot_df$pval )
|
||||||
|
# plot_df$pvalF = ifelse(plot_df$pval == 0.05, paste0(round(plot_df$pval, 3), ". "), plot_df$pval )
|
||||||
|
|
||||||
|
round(plot_df$pvalF, 3)
|
||||||
|
|
||||||
|
|
||||||
#================================================
|
#================================================
|
||||||
# Plot attempt 1 [no stats]: WORKS beeautifully
|
# Plot attempt 1 [no stats]: WORKS beeautifully
|
||||||
#================================================
|
#================================================
|
||||||
ggplot(plot_df, aes(x = lineage
|
# ggplot(plot_df, aes(x = lineage
|
||||||
, fill = factor(sensitivity))) +
|
# , fill = factor(sensitivity))) +
|
||||||
geom_bar(stat = 'count')+
|
# geom_bar(stat = 'count')+
|
||||||
#coord_cartesian(ylim = c(0, ypos_label)) +
|
# #coord_cartesian(ylim = c(0, ypos_label)) +
|
||||||
facet_wrap(~mutationinformation
|
# facet_wrap(~mutationinformation
|
||||||
, scales = 'free_y')
|
# , scales = 'free_y')
|
||||||
|
|
||||||
#########################################################
|
#########################################################
|
||||||
#================================================
|
#================================================
|
||||||
|
@ -175,23 +197,47 @@ if (nrow(lin_muts_dfM) == nrow(plot_df) ){
|
||||||
#================================================
|
#================================================
|
||||||
# Plot: with stats (plot_df2)
|
# Plot: with stats (plot_df2)
|
||||||
# TODO:
|
# TODO:
|
||||||
#1) Add gene name from variable as plot title. <Placeholder provided>
|
#1) DONE: Add gene name from variable as plot title. <Placeholder provided>
|
||||||
#2) Add: facet_wrap_paginate () to allow graphs to span over multiple pages
|
#2) DONE: Add: facet_wrap_paginate () to allow graphs to span over multiple pages
|
||||||
#3) Add *: Extend yaxis for each plot to allow geom_label to have space (or see
|
#3) Add *: Extend yaxis for each plot to allow geom_label to have space (or see
|
||||||
# if this self resolving with facet_wrap_paginate())
|
# if this self resolving with facet_wrap_paginate())
|
||||||
#================================================
|
#================================================
|
||||||
p_title = "<Insert gene>"
|
plot_pages = round(length(lin_muts)/25)
|
||||||
|
p_title = gene
|
||||||
|
res = 144 # SVG dots-per-inch
|
||||||
|
|
||||||
ggplot(plot_df2, aes(x = lineage
|
sapply(1:plot_pages, function(page){
|
||||||
, fill = sensitivity)) +
|
print(paste0("Plotting page:", page))
|
||||||
geom_bar(stat = 'count') +
|
svglite(paste0("/tmp/output-",page,".svg"), width=2048/res, height=1534/res) # old-school square 4:3 CRT shape 1.3:1
|
||||||
facet_wrap(~mutationinformation
|
print(
|
||||||
, scales = 'free_y') +
|
ggplot(plot_df2, aes(x = lineage
|
||||||
theme(legend.position = "top")+
|
, fill = sensitivity)) +
|
||||||
labs(title = p_title) +
|
geom_bar(stat = 'count') +
|
||||||
#geom_text(aes(label = p.value, x = 0.5, y = 5))
|
facet_wrap_paginate(~mutationinformation
|
||||||
geom_label(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1), fill="white")# +
|
, scales = 'free_y'
|
||||||
#geom_text(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1))# +
|
, ncol = 5
|
||||||
|
, nrow = 5
|
||||||
|
, page = page) +
|
||||||
|
theme(legend.position = "top"
|
||||||
|
, plot.title = element_text(hjust = 0.5, size=20)
|
||||||
|
, strip.text = element_text(size=14)
|
||||||
|
, axis.text.x = element_text(size=14)
|
||||||
|
, axis.text.y = element_text(size=14)
|
||||||
|
, axis.title.y = element_text(size=14)
|
||||||
|
, legend.title = element_blank()
|
||||||
|
, axis.title.x = element_blank()
|
||||||
|
)+
|
||||||
|
labs(title = paste0(p_title, ": sensitivity by lineage")
|
||||||
|
, y = 'Sample Count'
|
||||||
|
) +
|
||||||
|
#geom_text(aes(label = p.value, x = 0.5, y = 5))
|
||||||
|
geom_blank(aes(y = ypos_label+2)) +
|
||||||
|
geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white")
|
||||||
|
)
|
||||||
|
dev.off()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
#geom_text(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1))# +
|
||||||
|
|
||||||
#geom_segment(aes(x = 1, y = ypos_label+0.5, xend = 4, yend = ypos_label+0.5))
|
#geom_segment(aes(x = 1, y = ypos_label+0.5, xend = 4, yend = ypos_label+0.5))
|
||||||
#geom_hline(data = lin_muts_dfM, aes(yintercept=ypos_label+0.5))
|
#geom_hline(data = lin_muts_dfM, aes(yintercept=ypos_label+0.5))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue