lineage plots: be a function and then run over all pairs
This commit is contained in:
parent
8965bee5d6
commit
cbfa9ff31b
2 changed files with 193 additions and 7 deletions
|
@ -118,12 +118,7 @@ plot_df$pvalRF = ifelse(plot_df$pvalR == 0.05, paste0("p=",plot_df$pvalR, "."),
|
|||
plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.05, paste0("p=",plot_df$pvalR, "*"), plot_df$pvalRF )
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.01, paste0("p=",plot_df$pvalR, "**"), plot_df$pvalRF )
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR == 0, 'p<0.001, ***', plot_df$pvalRF)
|
||||
|
||||
|
||||
|
||||
head(plot_df$pval)
|
||||
head(plot_df$pvalR)
|
||||
View(plot_df$pvalRF)
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR > 0.05, paste0("p=",plot_df$pvalR), plot_df$pvalRF)
|
||||
|
||||
# format p value
|
||||
# TODO: add case statement for correct pvalue formatting
|
||||
|
@ -231,7 +226,7 @@ sapply(1:plot_pages, function(page){
|
|||
, y = 'Sample Count'
|
||||
) +
|
||||
#geom_text(aes(label = p.value, x = 0.5, y = 5))
|
||||
geom_blank(aes(y = ypos_label+2)) +
|
||||
geom_blank(aes(y = ypos_label+1.25)) +
|
||||
geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white")
|
||||
)
|
||||
dev.off()
|
||||
|
|
191
scripts/plotting/lineage_plots_multipage.R
Normal file
191
scripts/plotting/lineage_plots_multipage.R
Normal file
|
@ -0,0 +1,191 @@
|
|||
library(tidyverse)
|
||||
library("ggforce")
|
||||
library(gginference)
|
||||
library(ggpubr)
|
||||
library(svglite)
|
||||
|
||||
# for testing only
|
||||
gene="pncA"
|
||||
drug="pyrazinamide"
|
||||
|
||||
lineage_plot=function(gene,drug){
|
||||
lineage_filename=paste0(tolower(gene),"_merged_df2.csv")
|
||||
lineage_data_path=paste0("~/git/Data/", drug, "/output") # NARSTY
|
||||
full_file_path = paste0(lineage_data_path,"/",lineage_filename)
|
||||
print(paste0("Loading: ",full_file_path))
|
||||
df = read.csv(full_file_path)
|
||||
#df2 = read.csv("/home/tanu/git/Data/pyrazinamide/output/pnca_merged_df3.csv")
|
||||
|
||||
foo = as.data.frame(colnames(df))
|
||||
|
||||
cols_to_subset = c('mutationinformation'
|
||||
, 'snp_frequency'
|
||||
, 'pos_count'
|
||||
, 'position'
|
||||
, 'lineage'
|
||||
, 'lineage_multimode'
|
||||
, 'dst'
|
||||
, 'dst_multimode'
|
||||
#, 'dst_multimode_all'
|
||||
, 'dst_mode')
|
||||
|
||||
my_df = df[ ,cols_to_subset]
|
||||
#df2 = df2[ ,cols_to_subset]
|
||||
|
||||
# Now we need to make a column that fill na in dst with value of dst_mode
|
||||
my_df$dst2 = ifelse(is.na(my_df$dst), my_df$dst_mode, my_df$dst)
|
||||
|
||||
#%% create sensitivity column ~ dst_mode
|
||||
my_df$sensitivity = ifelse(my_df$dst2 == 1, "R", "S")
|
||||
table(my_df$dst2)
|
||||
if ( table(my_df$sensitivity)[2] == table(my_df$dst2)[1] && table(my_df$sensitivity)[1] == table(my_df$dst2)[2] ){
|
||||
cat("\nProceeding with lineage resistance plots")
|
||||
}else{
|
||||
stop("FAIL: could not verify dst2 and sensitivity numbers")
|
||||
}
|
||||
|
||||
#%%
|
||||
# select only L1-L4 and LBOV
|
||||
sel_lineages1 = c("LBOV", "")
|
||||
my_df2 = my_df[!my_df$lineage%in%sel_lineages1,]
|
||||
table(my_df2$lineage)
|
||||
|
||||
sel_lineages2 = c("L1", "L2", "L3", "L4")
|
||||
my_df2 = my_df2[my_df2$lineage%in%sel_lineages2,]
|
||||
table(my_df2$lineage)
|
||||
|
||||
sum(table(my_df2$lineage)) == nrow(my_df2)
|
||||
table(my_df2$lineage)
|
||||
|
||||
#%% get only muts which belong to > 1 lineage and have different sensitivity classifications
|
||||
muts = unique(my_df2$mutationinformation)
|
||||
|
||||
# step1 : get muts with more than one lineage
|
||||
lin_muts = NULL
|
||||
for (i in muts) {
|
||||
print (i)
|
||||
s_mut = my_df2[my_df2$mutationinformation == i,]
|
||||
s_tab = table(s_mut$lineage, s_mut$sensitivity)
|
||||
#print(s_tab)
|
||||
if (dim(s_tab)[1] > 1 && dim(s_tab)[2] > 1){
|
||||
lin_muts = c(lin_muts, i)
|
||||
}
|
||||
}
|
||||
cat("\nGot:", length(lin_muts), "mutations belonging to >1 lineage with differing drug sensitivities")
|
||||
|
||||
# step 2: subset these muts for plotting
|
||||
plot_df = my_df2[my_df2$mutationinformation%in%lin_muts,]
|
||||
|
||||
cat("\nnrow of plot_df:", nrow(plot_df))
|
||||
|
||||
# step 3: Add p-value
|
||||
plot_df$pval = NULL
|
||||
for (i in lin_muts) {
|
||||
#print (i)
|
||||
s_mut = plot_df[plot_df$mutationinformation == i,]
|
||||
s_tab = table(s_mut$lineage, s_mut$sensitivity)
|
||||
ft_pvalue_i = fisher.test(s_tab)$p.value
|
||||
plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i
|
||||
}
|
||||
plot_df$pvalR = round(plot_df$pval, 3)
|
||||
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR == 0.05, paste0("p=",plot_df$pvalR, "."), plot_df$pvalR )
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.05, paste0("p=",plot_df$pvalR, "*"), plot_df$pvalRF )
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.01, paste0("p=",plot_df$pvalR, "**"), plot_df$pvalRF )
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR == 0, 'p<0.001, ***', plot_df$pvalRF)
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR > 0.05, paste0("p=",plot_df$pvalR), plot_df$pvalRF)
|
||||
|
||||
# format p value
|
||||
lin_muts_tb = plot_df %>%
|
||||
group_by(mutationinformation) %>%
|
||||
count(lineage) %>%
|
||||
mutate(ypos_label = max(n))
|
||||
|
||||
head(lin_muts_tb); class(lin_muts_tb)
|
||||
lin_muts_df = as.data.frame(lin_muts_tb)
|
||||
class(lin_muts_df)
|
||||
|
||||
intersect(names(plot_df), names(lin_muts_df))
|
||||
|
||||
select_cols = c("mutationinformation", "ypos_label")
|
||||
lin_muts_df2 = lin_muts_df[, select_cols]
|
||||
names(lin_muts_df2) ; head(lin_muts_df2)
|
||||
|
||||
# remove duplicates before merging
|
||||
lin_muts_df2U = lin_muts_df2[!duplicated(lin_muts_df2),]
|
||||
class(lin_muts_df2); class(plot_df); class(lin_muts_df2U)
|
||||
|
||||
lin_muts_dfM = merge(plot_df, lin_muts_df2U, by = "mutationinformation", all.y = T)
|
||||
|
||||
if (nrow(lin_muts_dfM) == nrow(plot_df) ){
|
||||
cat("\nPASS: plot_df now has ypos for label"
|
||||
, "\nGenerating plot_df2 with sensitivity as factor\n")
|
||||
str(lin_muts_dfM)
|
||||
lin_muts_dfM$sensitivity = as.factor(lin_muts_dfM$sensitivity)
|
||||
plot_df2 = lin_muts_dfM
|
||||
|
||||
}else{
|
||||
stop("\nSomething went wrong. ypos_label could not be generated")
|
||||
}
|
||||
|
||||
|
||||
# Do plots
|
||||
plot_pages = round(length(lin_muts)/25)
|
||||
p_title = gene
|
||||
res = 144 # SVG dots-per-inch
|
||||
|
||||
sapply(1:plot_pages, function(page){
|
||||
print(paste0("Plotting page:", page))
|
||||
svglite(paste0("/tmp/",drug,"-",page,".svg"), width=2048/res, height=1534/res) # old-school square 4:3 CRT shape 1.3:1
|
||||
print(
|
||||
ggplot(plot_df2, aes(x = lineage
|
||||
, fill = sensitivity)) +
|
||||
geom_bar(stat = 'count') +
|
||||
facet_wrap_paginate(~mutationinformation
|
||||
, scales = 'free_y'
|
||||
, ncol = 5
|
||||
, nrow = 5
|
||||
, page = page) +
|
||||
theme(legend.position = "top"
|
||||
, plot.title = element_text(hjust = 0.5, size=20)
|
||||
, strip.text = element_text(size=14)
|
||||
, axis.text.x = element_text(size=14)
|
||||
, axis.text.y = element_text(size=14)
|
||||
, axis.title.y = element_text(size=14)
|
||||
, legend.title = element_blank()
|
||||
, axis.title.x = element_blank()
|
||||
)+
|
||||
labs(title = paste0(p_title, ": sensitivity by lineage")
|
||||
, y = 'Sample Count'
|
||||
) +
|
||||
#geom_text(aes(label = p.value, x = 0.5, y = 5))
|
||||
geom_blank(aes(y = ypos_label+1.25)) +
|
||||
geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white")
|
||||
)
|
||||
dev.off()
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
# hardcoded list of drugs
|
||||
drugs = c(#"ethambutol",
|
||||
#"isoniazid",
|
||||
"pyrazinamide",
|
||||
"rifampicin",
|
||||
"streptomycin",
|
||||
"cycloserine")
|
||||
genes = c(#"embB",
|
||||
#"katG",
|
||||
"pncA",
|
||||
"rpoB",
|
||||
"gid",
|
||||
"alr")
|
||||
combo = data.frame(drugs, genes)
|
||||
|
||||
#sapply(combo$drugs, function(x){print(c(x,combo[drugs==x,"genes"]))})
|
||||
|
||||
# generate graphs for all drug/gene combinations in "combo"
|
||||
sapply(combo$drugs, function(drug){
|
||||
gene=combo[drugs==drug,"genes"]
|
||||
lineage_plot(gene,drug)
|
||||
})
|
Loading…
Add table
Add a link
Reference in a new issue