generated embb lineage plots
This commit is contained in:
parent
c8f3ddf892
commit
d984d283c5
3 changed files with 208 additions and 64 deletions
|
@ -11,46 +11,55 @@ library(svglite)
|
|||
# gene must be lowercase
|
||||
# tolower(gene)
|
||||
#################################################
|
||||
gene="pncA"
|
||||
drug="pyrazinamide"
|
||||
#gene="pncA"
|
||||
#drug="pyrazinamide"
|
||||
|
||||
lineage_filename=paste0(tolower(gene),"_merged_df2.csv")
|
||||
lineage_data_path="~/git/Data/pyrazinamide/output"
|
||||
#lineage_filename=paste0(tolower(gene),"_merged_df2.csv")
|
||||
#lineage_data_path="~/git/Data/pyrazinamide/output"
|
||||
|
||||
df = read.csv(paste0(lineage_data_path,"/",lineage_filename))
|
||||
#df2 = read.csv("/home/tanu/git/Data/pyrazinamide/output/pnca_merged_df3.csv")
|
||||
df2 = read.csv(paste0(lineage_data_path,"/",lineage_filename))
|
||||
|
||||
foo = as.data.frame(colnames(df))
|
||||
foo = as.data.frame(colnames(df2))
|
||||
|
||||
cols_to_subset = c('mutationinformation'
|
||||
, 'snp_frequency'
|
||||
, 'pos_count'
|
||||
, 'position'
|
||||
, 'lineage'
|
||||
, 'lineage_multimode'
|
||||
, "sensitivity"
|
||||
#, 'lineage_multimode'
|
||||
, 'dst'
|
||||
, 'dst_multimode'
|
||||
, 'dst2'
|
||||
#, 'dst_multimode'
|
||||
#, 'dst_multimode_all'
|
||||
, 'dst_mode')
|
||||
|
||||
my_df = df[ ,cols_to_subset]
|
||||
#df2 = df2[ ,cols_to_subset]
|
||||
#cols_to_subset%in%foo
|
||||
|
||||
r24p_embb = df_embb[df_embb$mutationinformation == "R24P",]
|
||||
my_df = df2[ ,cols_to_subset]
|
||||
|
||||
tm = c("A102P", "M1T")
|
||||
test = my_df[my_df$mutationinformation%in%tm,]
|
||||
#test$dst2[is.na(test$dst)] <-test$dst_mode
|
||||
test$dst2 = ifelse(is.na(test$dst), test$dst_mode, test$dst)
|
||||
sum(table(test$dst2)) == nrow(test)
|
||||
# r24p_embb = df_embb[df_embb$mutationinformation == "R24P",]
|
||||
# #tm = c("A102P", "M1T")
|
||||
# test = my_df[my_df$mutationinformation%in%tm,]
|
||||
# #test$dst2[is.na(test$dst)] <-test$dst_mode
|
||||
# test$dst2 = ifelse(is.na(test$dst), test$dst_mode, test$dst)
|
||||
# sum(table(test$dst2)) == nrow(test)
|
||||
|
||||
|
||||
# already exist now
|
||||
#---------------------------------
|
||||
# Now we need to make a column that fill na in dst with value of dst_mode
|
||||
my_df$dst2 = ifelse(is.na(my_df$dst), my_df$dst_mode, my_df$dst)
|
||||
#my_df$dst2_check = ifelse(is.na(my_df$dst), my_df$dst_mode, my_df$dst)
|
||||
#all(my_df$dst2_check ==my_df$dst2)
|
||||
|
||||
#%% create sensitivity column ~ dst_mode
|
||||
my_df$sensitivity = ifelse(my_df$dst2 == 1, "R", "S")
|
||||
#%% create sensitivity column ~ dst2[revised]
|
||||
my_df$sens2 = ifelse(my_df$dst2 == 1, "R", "S")
|
||||
|
||||
#---------------------------------
|
||||
table(my_df$sens2)
|
||||
table(my_df$dst2)
|
||||
if ( table(my_df$sensitivity)[2] == table(my_df$dst2)[1] && table(my_df$sensitivity)[1] == table(my_df$dst2)[2] ){
|
||||
|
||||
if ( table(my_df$sens2 )[2] == table(my_df$dst2)[1] && table(my_df$sens2 )[1] == table(my_df$dst2)[2] ){
|
||||
cat("\nProceeding with lineage resistance plots")
|
||||
}else{
|
||||
stop("FAIL: could not verify dst2 and sensitivity numbers")
|
||||
|
@ -72,30 +81,39 @@ table(my_df2$lineage)
|
|||
# %%
|
||||
# str(my_df2)
|
||||
# my_df2$lineage = as.factor(my_df2$lineage)
|
||||
# my_df2$sensitivity = as.factor(my_df2$sensitivity)
|
||||
# my_df2$sens2 = as.factor(my_df2$sens2)
|
||||
|
||||
#%% get only muts which belong to > 1 lineage and have different sensitivity classifications
|
||||
muts = unique(my_df2$mutationinformation)
|
||||
#-----------------------------------------------
|
||||
# step1 : get muts with more than one lineage
|
||||
# step 0 : get muts with more than one lineage
|
||||
#-----------------------------------------------
|
||||
lin_muts = NULL
|
||||
for (i in muts) {
|
||||
print (i)
|
||||
s_mut = my_df2[my_df2$mutationinformation == i,]
|
||||
s_tab = table(s_mut$lineage, s_mut$sensitivity)
|
||||
s_tab = table(s_mut$lineage, s_mut$sens2)
|
||||
#print(s_tab)
|
||||
if (dim(s_tab)[1] > 1 && dim(s_tab)[2] > 1){
|
||||
lin_muts = c(lin_muts, i)
|
||||
}
|
||||
}
|
||||
cat("\nGot:", length(lin_muts), "mutations belonging to >1 lineage with differing drug sensitivities")
|
||||
|
||||
|
||||
#-----------------------------------------------
|
||||
# step 1 : get other muts that do not have this
|
||||
#-----------------------------------------------
|
||||
consist_muts = muts[!muts%in%lin_muts]
|
||||
cat("\nGot:", length(consist_muts), "mutations that are consistent")
|
||||
|
||||
#-----------------------------------------------
|
||||
# step 2: subset these muts for plotting
|
||||
#-----------------------------------------------
|
||||
plot_df = my_df2[my_df2$mutationinformation%in%lin_muts,]
|
||||
|
||||
cat("\nnrow of plot_df:", nrow(plot_df))
|
||||
|
||||
#-----------------------------------------------
|
||||
# step 3: Add p-value
|
||||
#-----------------------------------------------
|
||||
|
@ -104,7 +122,7 @@ for (i in lin_muts) {
|
|||
#print (i)
|
||||
s_mut = plot_df[plot_df$mutationinformation == i,]
|
||||
#print(s_mut)
|
||||
s_tab = table(s_mut$lineage, s_mut$sensitivity)
|
||||
s_tab = table(s_mut$lineage, s_mut$sens2)
|
||||
#print(s_tab)
|
||||
#ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3)
|
||||
ft_pvalue_i = fisher.test(s_tab)$p.value
|
||||
|
@ -114,11 +132,30 @@ for (i in lin_muts) {
|
|||
}
|
||||
plot_df$pvalR = round(plot_df$pval, 3)
|
||||
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR == 0.05, paste0("p=",plot_df$pvalR, "."), plot_df$pvalR )
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.05, paste0("p=",plot_df$pvalR, "*"), plot_df$pvalRF )
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.01, paste0("p=",plot_df$pvalR, "**"), plot_df$pvalRF )
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR == 0, 'p<0.001, ***', plot_df$pvalRF)
|
||||
plot_df$pvalRF = ifelse(plot_df$pvalR > 0.05, paste0("p=",plot_df$pvalR), plot_df$pvalRF)
|
||||
# plot_df$pvalRF = ifelse(plot_df$pvalR == 0.05, paste0("p=",plot_df$pvalR, "."), plot_df$pvalR )
|
||||
# plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.05, paste0("p=",plot_df$pvalR, "*"), plot_df$pvalRF )
|
||||
# plot_df$pvalRF = ifelse(plot_df$pvalR <= 0.01, paste0("p=",plot_df$pvalR, "**"), plot_df$pvalRF )
|
||||
# plot_df$pvalRF = ifelse(plot_df$pvalR == 0, 'p<0.001, ***', plot_df$pvalRF)
|
||||
# plot_df$pvalRF = ifelse(plot_df$pvalR > 0.05, paste0("p=",plot_df$pvalR), plot_df$pvalRF)
|
||||
# plot_df
|
||||
# plot_df$pvalRF_old = plot_df$pvalRF
|
||||
|
||||
plot_df$pvalRF = plot_df$pvalR
|
||||
plot_df = dplyr::mutate(plot_df
|
||||
, pvalRF = case_when(pvalRF == 0.05 ~ "P ."
|
||||
, pvalRF <=0.0001 ~ 'P ****'
|
||||
, pvalRF <=0.001 ~ 'P ***'
|
||||
, pvalRF <=0.01 ~ 'P **'
|
||||
, pvalRF <0.05 ~ 'P *'
|
||||
, TRUE ~ 'ns'))
|
||||
|
||||
|
||||
plot_df
|
||||
|
||||
head(plot_df)
|
||||
table(plot_df$pvalR<0.05)
|
||||
|
||||
|
||||
|
||||
# format p value
|
||||
# TODO: add case statement for correct pvalue formatting
|
||||
|
@ -149,8 +186,8 @@ plot_df$pvalRF = ifelse(plot_df$pvalR > 0.05, paste0("p=",plot_df$pvalR), plot_d
|
|||
#
|
||||
# # Calculate stats: example
|
||||
# test2 = plot_df[plot_df$mutationinformation%in%tm2,]
|
||||
# table(test2$mutationinformation, test2$lineage, test2$sensitivity)
|
||||
# tm_tab = table(test2$lineage, test2$sensitivity)
|
||||
# table(test2$mutationinformation, test2$lineage, test2$sens2)
|
||||
# tm_tab = table(test2$lineage, test2$sens2)
|
||||
# tm_tab
|
||||
|
||||
# Get the ypos for plotting the label for p-value
|
||||
|
@ -179,13 +216,26 @@ if (nrow(lin_muts_dfM) == nrow(plot_df) ){
|
|||
cat("\nPASS: plot_df now has ypos for label"
|
||||
, "\nGenerating plot_df2 with sensitivity as factor\n")
|
||||
str(lin_muts_dfM)
|
||||
lin_muts_dfM$sensitivity = as.factor(lin_muts_dfM$sensitivity)
|
||||
lin_muts_dfM$sens2 = as.factor(lin_muts_dfM$sens2)
|
||||
plot_df2 = lin_muts_dfM
|
||||
|
||||
}else{
|
||||
stop("\nSomething went wrong. ypos_label could not be generated")
|
||||
}
|
||||
|
||||
|
||||
# sig muts
|
||||
plot_df_sig = plot_df2[plot_df2$pvalR<0.05,]
|
||||
sig_muts = length(unique(plot_df_sig$mutationinformation))
|
||||
cat("\nGot:", sig_muts, "mutations that are significant")
|
||||
|
||||
|
||||
plot_df_ns = plot_df2[plot_df2$pvalR>0.05,]
|
||||
ns_muts = length(unique(plot_df_ns$mutationinformation))
|
||||
cat("\nGot:", ns_muts, "mutations that are NOT significant")
|
||||
p_title = gene
|
||||
ts = 8
|
||||
gls = 3
|
||||
#================================================
|
||||
# Plot: with stats (plot_df2)
|
||||
# TODO:
|
||||
|
@ -194,41 +244,94 @@ if (nrow(lin_muts_dfM) == nrow(plot_df) ){
|
|||
#3) Add *: Extend yaxis for each plot to allow geom_label to have space (or see
|
||||
# if this self resolving with facet_wrap_paginate())
|
||||
#================================================
|
||||
plot_pages = round(length(lin_muts)/25)
|
||||
p_title = gene
|
||||
res = 144 # SVG dots-per-inch
|
||||
|
||||
sapply(1:plot_pages, function(page){
|
||||
print(paste0("Plotting page:", page))
|
||||
svglite(paste0("/tmp/output-",page,".svg"), width=2048/res, height=1534/res) # old-school square 4:3 CRT shape 1.3:1
|
||||
print(
|
||||
ggplot(plot_df2, aes(x = lineage
|
||||
, fill = sensitivity)) +
|
||||
#svg(paste0(outdir_images, "embb_linDS.svg"), width = 6, height = 10 ) # old-school square 4:3 CRT shape 1.3:1
|
||||
ds_s = ggplot(plot_df_sig, aes(x = lineage
|
||||
, fill = sens2)) +
|
||||
geom_bar(stat = 'count') +
|
||||
facet_wrap_paginate(~mutationinformation
|
||||
scale_fill_manual(name = ""
|
||||
# name = leg_title
|
||||
, values = c("red", "blue")
|
||||
#, labels = levels(sens2))
|
||||
)+
|
||||
facet_wrap(~mutationinformation
|
||||
, scales = 'free_y'
|
||||
, ncol = 5
|
||||
, nrow = 5
|
||||
, page = page) +
|
||||
theme(legend.position = "top"
|
||||
, plot.title = element_text(hjust = 0.5, size=20)
|
||||
, strip.text = element_text(size=14)
|
||||
, axis.text.x = element_text(size=14)
|
||||
, axis.text.y = element_text(size=14)
|
||||
, axis.title.y = element_text(size=14)
|
||||
, ncol = 3
|
||||
, nrow = 4
|
||||
) +
|
||||
theme(legend.position = "none" #top
|
||||
#, plot.title = element_text(hjust = 0.5, size=15)
|
||||
, plot.title = element_blank()
|
||||
|
||||
, strip.text = element_text(size=ts+2)
|
||||
, axis.text.x = element_text(size=ts)
|
||||
, axis.text.y = element_text(size=ts)
|
||||
, axis.title.y = element_text(size=ts)
|
||||
, legend.title = element_blank()
|
||||
, axis.title.x = element_blank()
|
||||
)+
|
||||
labs(title = paste0(p_title, ": sensitivity by lineage")
|
||||
, y = 'Sample Count'
|
||||
) +
|
||||
#geom_text(aes(label = p.value, x = 0.5, y = 5))
|
||||
, y = 'Sample Count') +
|
||||
#geom_text(aes(label = pvalRF, x = 2.5, y = ypos_label+0.75))
|
||||
geom_blank(aes(y = ypos_label+1.25)) +
|
||||
geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white")
|
||||
)
|
||||
dev.off()
|
||||
}
|
||||
)
|
||||
geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white", size =gls)
|
||||
|
||||
#dev.off()
|
||||
|
||||
###################################
|
||||
#ns muts
|
||||
|
||||
#svg(paste0(outdir_images, "embb_linDS_ns.svg"), width =10 , height = 8) # old-school square 4:3 CRT shape 1.3:1
|
||||
ds_ns = ggplot(plot_df_ns, aes(x = lineage
|
||||
, fill = sens2)) +
|
||||
geom_bar(stat = 'count') +
|
||||
scale_fill_manual(name = ""
|
||||
# name = leg_title
|
||||
, values = c("red", "blue")
|
||||
#, labels = levels(sens2))
|
||||
)+
|
||||
|
||||
facet_wrap(~mutationinformation
|
||||
, scales = 'free_y'
|
||||
#, ncol = 5
|
||||
#, nrow = 5
|
||||
) +
|
||||
theme(legend.position = "none" #top
|
||||
#, plot.title = element_text(hjust = 0.5, size=20)
|
||||
, plot.title = element_blank()
|
||||
|
||||
, strip.text = element_text(size=ts)
|
||||
, axis.text.x = element_text(size=ts)
|
||||
, axis.text.y = element_text(size=ts)
|
||||
, axis.title.y = element_text(size=ts)
|
||||
, legend.title = element_blank()
|
||||
, axis.title.x = element_blank()
|
||||
)+
|
||||
labs(title = paste0(p_title, ": sensitivity by lineage")
|
||||
, y = 'Sample Count')
|
||||
#dev.off()
|
||||
|
||||
|
||||
# svg(paste0(outdir_images, "embb_linDS_CL.svg")
|
||||
# , width = 11
|
||||
# , height = 8 )
|
||||
png(paste0(outdir_images, "embb_linDS_CL.png")
|
||||
, width = 11.75
|
||||
, height = 8, units = "in", res = 300 )
|
||||
|
||||
cowplot::plot_grid(ds_s, ds_ns
|
||||
, ncol = 2
|
||||
,rel_widths = c(1,2)
|
||||
, labels = "AUTO")
|
||||
|
||||
dev.off()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#geom_text(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1))# +
|
||||
|
||||
#geom_segment(aes(x = 1, y = ypos_label+0.5, xend = 4, yend = ypos_label+0.5))
|
||||
|
|
|
@ -21,6 +21,47 @@ geneL_normal = c("pnca")
|
|||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
# LigDist_colname # from globals used
|
||||
# ppi2Dist_colname #from globals used
|
||||
# naDist_colname #from globals used
|
||||
|
||||
common_cols = c("mutationinformation"
|
||||
, "X5uhc_position"
|
||||
, "X5uhc_offset"
|
||||
, "position"
|
||||
, "dst_mode"
|
||||
, "mutation_info_labels"
|
||||
, "sensitivity", dist_columns )
|
||||
|
||||
|
||||
########################################
|
||||
categ_cols_to_factor = grep( "_outcome|_info", colnames(merged_df3) )
|
||||
fact_cols = colnames(merged_df3)[categ_cols_to_factor]
|
||||
|
||||
if (any(lapply(merged_df3[, fact_cols], class) == "character")){
|
||||
cat("\nChanging", length(categ_cols_to_factor), "cols to factor")
|
||||
merged_df3[, fact_cols] <- lapply(merged_df3[, fact_cols], as.factor)
|
||||
if (all(lapply(merged_df3[, fact_cols], class) == "factor")){
|
||||
cat("\nSuccessful: cols changed to factor")
|
||||
}
|
||||
}else{
|
||||
cat("\nRequested cols aready factors")
|
||||
}
|
||||
|
||||
cat("\ncols changed to factor are:\n", colnames(merged_df3)[categ_cols_to_factor] )
|
||||
|
||||
####################################
|
||||
# merged_df3: NECESSARY pre-processing
|
||||
###################################
|
||||
#df3 = merged_df3
|
||||
plot_cols = c("mutationinformation", "mutation_info_labels", "position", "dst_mode"
|
||||
, all_cols)
|
||||
|
||||
all_cols = c(common_cols
|
||||
, all_stability_cols
|
||||
, all_affinity_cols
|
||||
, all_conserv_cols)
|
||||
|
||||
|
||||
# counting
|
||||
foo = merged_df3[, c("mutationinformation"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue