From 5f441d09d9fca0c602765c2739d08437bca325bb Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 6 Oct 2020 16:33:25 +0100 Subject: [PATCH] added hist_af_or_combined.R to generate plots for output and moved previosu run to scratch_plots/ --- .../{hist_af.R => hist_af_or_combined.R} | 317 +++++++++++------- 1 file changed, 193 insertions(+), 124 deletions(-) rename scripts/plotting/{hist_af.R => hist_af_or_combined.R} (65%) mode change 100755 => 100644 diff --git a/scripts/plotting/hist_af.R b/scripts/plotting/hist_af_or_combined.R old mode 100755 new mode 100644 similarity index 65% rename from scripts/plotting/hist_af.R rename to scripts/plotting/hist_af_or_combined.R index e1bfd33..5119629 --- a/scripts/plotting/hist_af.R +++ b/scripts/plotting/hist_af_or_combined.R @@ -26,12 +26,28 @@ rm(my_df, upos, dup_muts, my_df_u_lig) # output #======= # plot 1 -hist_af_muts = "hist_af_mutations.svg" +hist_af_muts = "hist_mutations_AF.svg" plot_hist_af_muts = paste0(plotdir,"/", hist_af_muts) # plot 2 -hist_af_samples = "hist_af_samples.svg" -plot_hist_af_samples = paste0(plotdir, "/", hist_af_samples ) +hist_or_muts = "hist_mutations_OR.svg" +plot_hist_or_muts = paste0(plotdir,"/", hist_or_muts) + +# plot 3 +hist_af_muts_sample = "hist_af_muts_sample_combined.svg" +plot_hist_af_muts_sample = paste0(plotdir,"/", hist_af_muts_sample) + +# plot 4 +hist_af_or = "hist_af_or_combined.svg" +plot_hist_af_or = paste0(plotdir,"/", hist_af_or) + +# plot 5 +af_or_combined_med = "hist_bp_muts_combined_median.svg" +plot_af_or_combined_med = paste0(plotdir, "/", af_or_combined_med) + +# plot 6: without median line on hist +af_or_combined = "hist_bp_muts_combined.svg" +plot_af_or_combined = paste0(plotdir, "/", af_or_combined) #======================================================================= merged_df3_comp$mutation_info_labels = ifelse(merged_df3_comp$mutation_info == dr_muts_col, "DM", "OM") @@ -72,53 +88,12 @@ head(df2_af_median) svg(plot_hist_af_muts) print(paste0("plot1 filename:", plot_hist_af_muts)) -#-------------- -# start plot 1 -#-------------- -#par(mar=c(b, l, t, r)) -par(mar=c(5,6,1.3,0)) - -h1 = hist(df3$af - , freq = T - , breaks = 30 - , xlab = "Minor Allele Frequency" - , ylab = "Frequency" - , main = "" - , cex.lab = 1.7 - , cex.axis = 1.5 - , cex.main = 1.5 - , cex.sub = 1.5) - -print(h1) -dev.off() - -#**************** -# Plot 2: AF distribution: samples -#**************** -svg(plot_hist_af_samples) -print(paste0("plot2 filename:", plot_hist_af_samples)) - #-------------- # start plot 1 #-------------- #par(mar=c(b, l, t, r)) par(mar=c(5,6,1,0)) -h2 = hist(df2$af - , freq = T - , breaks = 30 - , xlab = "Minor Allele Frequency" - , ylab = "Frequency" - , main = "" - , cex.lab = 1.7 - , cex.axis = 1.5 - , cex.main = 1.5 - , cex.sub = 1.5) - -print(h2) -dev.off() - -#===================================================================== hist(df3$af , freq = T , breaks = 30 @@ -130,19 +105,127 @@ hist(df3$af , cex.main = 1.5 , cex.sub = 1.5) -hist(df3$af[df3$mutation_info_labels == "DM"] - , col = "#E69F00" - , breaks = 30 - #, add = T +dev.off() + +#**************** +# Plot 2: AF distribution: samples +#**************** +#-------------- +# start plot 2 +#-------------- +#par(mar=c(b, l, t, r)) +par(mar=c(5,6,1,0)) + +hist(df2$af + , freq = T + , breaks = 30 + , xlab = "Minor Allele Frequency" + , ylab = "Frequency" + , main = "" + , cex.lab = 1.7 + , cex.axis = 1.5 + , cex.main = 1.5 + , cex.sub = 1.5) + +#**************** +# Plot 3: OF distribution: mutations +#**************** +svg(plot_hist_or_muts) +print(paste0("plot3 filename:", plot_hist_or_muts)) + +#-------------- +# start plot 3 +#-------------- +#par(mar=c(b, l, t, r)) +par(mar=c(5,6,1,0)) + +hist(df3$or_mychisq + , freq = T + , breaks = 30 + , xlab = "Odds Ratio" + , ylab = "Frequency" + , main = "" + , cex.lab = 1.7 + , cex.axis = 1.5 + , cex.main = 1.5 + , cex.sub = 1.5) + +dev.off() +#==================================================================== +#========== +# combine and output +#========== +#-------------- +# combine: af and or +#------------- +svg(plot_hist_af_or, width = 10, height = 8) +print(paste0("plot3 filename:", plot_hist_af_or)) +#par(bty = "l") +par(mfrow=c(2,1)) +par(mar=c(4.5, 5.5, 2, 0)) +hist(df3$af , freq = T - , xlab = "Minor Allele Frequency" + , breaks = 30 + , xlab = "Minor Allele Frequency (MAF)" , ylab = "Frequency" , main = "" - , cex.lab = 1.7 - , cex.axis = 1.5 + , cex.lab = 1.3 + , cex.axis = 1.3 , cex.main = 1.5 , cex.sub = 1.5) -###################################################################################### + +# print the overall labels +mtext(expression(bold('(a)')), side = 3, adj = -0.1, cex = 1.8) + +hist(df3$or_mychisq + , freq = T + , breaks = 30 + , xlab = "Odds Ratio (OR)" + , ylab = "Frequency" + , main = "" + , cex.lab = 1.3 + , cex.axis = 1.3 + , cex.main = 1.5 + , cex.sub = 1.5) + +# print the overall labels +mtext(expression(bold('(b)')), side = 3, adj = -0.1, cex = 1.8) +dev.off() + + +#-------------- +# combine: af (mutations and samples) +#------------- +svg(plot_hist_af_muts_sample, width = 10, height = 8) +print(paste0("plot3 filename:", plot_hist_af_muts_sample)) +#par(bty = "l") +par(mfrow = c(1,2)) +par(mar=c(4.5, 5.5, 2, 0)) +hist(df3$af + , freq = T + , breaks = 30 + , xlab = "Minor Allele Frequency (MAF)" + , ylab = "Frequency" + , main = paste0(nrow(df3),"_pnca_mutations") + , cex.lab = 1.3 + , cex.axis = 1.3 + , cex.main = 1.5 + , cex.sub = 1.5) + +hist(df2$af + , freq = T + , breaks = 30 + , xlab = "Minor Allele Frequency (MAF)" + , ylab = "Frequency" + , main = paste0(nrow(df2),"_pnca_samples") + , cex.lab = 1.3 + , cex.axis = 1.3 + , cex.main = 1.5 + , cex.sub = 1.5) +dev.off() + + +######################################################## ############# # ggplots ############# @@ -208,13 +291,16 @@ g_af_mutinfo = ggplot(df3, aes(x = af #, axis.title.y = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() - , plot.title = element_blank() - , strip.text = element_text(size = my_als) + , plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5) + #, strip.text = element_text(size = my_als) + , strip.text = element_blank() + , strip.background = element_blank() , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) , legend.position = c(0.8, 0.9)) + - labs(x = "Minor Allele Frequency (MAF)" + labs(title = "Minor Allele Frequency (MAF)" + , x = "MAF" , y = "Count" , fill = "Mutation class") @@ -224,6 +310,8 @@ g_af_mutinfo_med = g_af_mutinfo + geom_vline(data = df3_af_median, aes(xintercep g_af_mutinfo_med #===================================================================== +my_comparisons <- list( c("DM", "OM") ) + g_af_bp = ggplot(df3, aes(x = mutation_info_labels , y = af , fill = mutation_info_labels))+ @@ -239,27 +327,17 @@ g_af_bp = ggplot(df3, aes(x = mutation_info_labels , strip.text = element_text(size = my_als) , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) - , legend.position = c(0.8, 0.9)) + - labs(y = "Minor Allele Frequency (MAF)" + , legend.position = "none") + + labs(y = "MAF" , x = "" , fill = "Mutation class") -g_af_bp -#===================================================================== -################### -# combine: afs -################### -library(cowplot) -grid.arrange(g_af_hist, g_af_mutinfo, g_af_bp) - -c2 = cowplot::plot_grid(g_mutinfo, g_bp - , nrow = 2 - , labels = c("(a)", "(b)") - , rel_widths = c(1.5/2, 0.25/2) - , label_size = 20) - -print(c2) - +g_af_bp_stats = g_af_bp + stat_compare_means(comparisons = my_comparisons + , method = "wilcox.test" + , paired = FALSE + #, label = "p.format" + , label = "p.signif") +g_af_bp_stats ###################################################################### # OR ###################################################################### @@ -281,17 +359,20 @@ g_or_mutinfo = ggplot(df3, aes(x = or_mychisq , axis.title.y = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.ticks.y = element_blank() - , plot.title = element_blank() - , strip.text = element_text(size = my_als) + , plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5) + #, strip.text = element_text(size = my_als) + , strip.text = element_blank() + , strip.background = element_blank() , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) - , legend.position = c(0.8, 0.9)) + - labs(x = "Odds Ratio" + , legend.position = c(0.8, 0.9))+ + labs(title = "Odds Ratio (OR)" + , x = "OR" , y = "Count" , fill = "Mutation class") g_or_mutinfo -g_or_mutinfo_med = g_or + geom_vline(data = df3_or_median, aes(xintercept = grp.median), +g_or_mutinfo_med = g_or_mutinfo + geom_vline(data = df3_or_median, aes(xintercept = grp.median), linetype = "dashed") g_or_mutinfo_med #===================================================================== @@ -310,69 +391,57 @@ g_or_bp = ggplot(df3, aes(x = mutation_info_labels , strip.text = element_text(size = my_als) , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) - , legend.position = c(0.8, 0.9)) + - labs(y = "Odds Ratio" + , legend.position = "none") + + labs(y = "OR" , x = "" , fill = "Mutation class") -g_or_bp - -################### -# combine: afs -################### -library(cowplot) -c_or = cowplot::plot_grid(g_or_mutinfo, g_or_bp - , nrow = 2 - , labels = c("(a)", "(b)") - , rel_widths = c(1.5/2, 0.25/2) - , label_size = 20) - -print(c_or) - - +g_or_bp_stats = g_or_bp + stat_compare_means(comparisons = my_comparisons + , method = "wilcox.test" + , paired = FALSE + #, label = "p.format" + , label = "p.signif") +g_or_bp_stats +############################################################################ +#============================== +# combine plots for outputs +#============================== +#------------------------------------ +# Plot 1: hist withOUT median line +#------------------------------------ +# combined plots without median +#print(paste0("plot combined filename:", plot_af_or_combined)) +#svg(plot_af_or_combined, width = 16, height = 9) c_combined = cowplot::plot_grid(g_af_mutinfo - , g_af_bp + , g_af_bp_stats , g_or_mutinfo - , g_or_bp + , g_or_bp_stats , nrow = 2 , labels = c("(a)", "(b)", "(c)", "(d)") , rel_widths = c(2/3, 1/3) , label_size = 20) -print(c_combined) +#print(c_combined) +#dev.off() - -c_combined2 = cowplot::plot_grid(g_af_mutinfo - , g_or_mutinfo - , g_af_bp - , g_or_bp +#------------------------------- +# Plot 2: hist WITH median line +#------------------------------- +print(paste0("plot combined filename:", plot_af_or_combined_med)) +svg(plot_af_or_combined_med, width = 16, height = 9) +c_combined_med = cowplot::plot_grid(g_af_mutinfo_med + , g_af_bp_stats + , g_or_mutinfo_med + , g_or_bp_stats , nrow = 2 , labels = c("(a)", "(b)", "(c)", "(d)") - #, rel_widths = c(1.5/2, 0.25/2) + , rel_widths = c(3/4, 1/4) , label_size = 20) -#print(c_combined2) - - - +print(c_combined_med) +dev.off() ###################################################################### -######################################################################## -# end of hist AF -######################################################################## -par(mfrow=c(1,2)) - -hist(df2$af - , xlab = "Minor Allele Frequency" - , ylab = "Frequency" - , main = paste0(nrow(df2),"_pnca_samples")) - - -hist(df3$af - , freq = T - , xlab = "Minor Allele Frequency" - , ylab = "Frequency" - , main = paste0(nrow(df3),"_pnca_mutations"))