#!/usr/bin/env Rscript ######################################################### # TASK: producing histogram of AF # basic # basic ######################################################### #======================================================================= # working dir and loading libraries getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() #source("Header_TT.R") library(ggplot2) library(data.table) library(dplyr) library(plyr) #source("plotting_data.R") source("combining_dfs_plotting.R") # clear excess variable rm(my_df, upos, dup_muts, my_df_u_lig) #======================================================================= #======= # output #======= # plot 1 hist_af_muts = "hist_mutations_AF.svg" plot_hist_af_muts = paste0(plotdir,"/", hist_af_muts) # plot 2 hist_or_muts = "hist_mutations_OR.svg" plot_hist_or_muts = paste0(plotdir,"/", hist_or_muts) # plot 3 hist_af_muts_sample = "hist_af_muts_sample_combined.svg" plot_hist_af_muts_sample = paste0(plotdir,"/", hist_af_muts_sample) # plot 4 hist_af_or = "hist_af_or_combined.svg" plot_hist_af_or = paste0(plotdir,"/", hist_af_or) # plot 5 af_or_combined_med = "hist_bp_muts_combined_median.svg" plot_af_or_combined_med = paste0(plotdir, "/", af_or_combined_med) # plot 6: without median line on hist af_or_combined = "hist_bp_muts_combined.svg" plot_af_or_combined = paste0(plotdir, "/", af_or_combined) #======================================================================= merged_df3_comp$mutation_info_labels = ifelse(merged_df3_comp$mutation_info == dr_muts_col, "DM", "OM") table(merged_df3_comp$mutation_info_labels) table(merged_df3_comp$mutation_info) == table(merged_df3_comp$mutation_info_labels) merged_df2_comp$mutation_info_labels = ifelse(merged_df2_comp$mutation_info == dr_muts_col, "DM", "OM") table(merged_df2_comp$mutation_info_labels) table(merged_df2_comp$mutation_info) == table(merged_df2_comp$mutation_info_labels) #================ # Data for plots #================ # REASSIGNMENT as necessary df = my_df_u df3 = merged_df3_comp # Contains duplicated samples, so you need to remove that for AF hist merged_df2_comp_u = merged_df2_comp[!duplicated(merged_df2_comp$id),] if ( nrow(merged_df2_comp_u) == length(unique(merged_df2_comp$id)) ){ cat("PASS: duplicated samples ommitted. Assiging for plotting...") df2 = merged_df2_comp_u }else{ cat("FAIL: duplicated samples could not be ommitted. Length mismatch" , "Expected nrows:", length(unique(merged_df2_comp$id)) , "Got nrows:", nrow(merged_df2_comp_u)) } df2_af_median <- ddply(df2, "mutation_info_labels", summarise, grp.median = median(af, na.rm = T)) head(df2_af_median) #======================================================================= #**************** # Plot 1: AF distribution: mutations #**************** svg(plot_hist_af_muts) print(paste0("plot1 filename:", plot_hist_af_muts)) #-------------- # start plot 1 #-------------- #par(mar=c(b, l, t, r)) par(mar=c(5,6,1,0)) hist(df3$af , freq = T , breaks = 30 , xlab = "Minor Allele Frequency" , ylab = "Frequency" , main = "" , cex.lab = 1.7 , cex.axis = 1.5 , cex.main = 1.5 , cex.sub = 1.5) dev.off() #**************** # Plot 2: AF distribution: samples #**************** #-------------- # start plot 2 #-------------- #par(mar=c(b, l, t, r)) par(mar=c(5,6,1,0)) hist(df2$af , freq = T , breaks = 30 , xlab = "Minor Allele Frequency" , ylab = "Frequency" , main = "" , cex.lab = 1.7 , cex.axis = 1.5 , cex.main = 1.5 , cex.sub = 1.5) #**************** # Plot 3: OF distribution: mutations #**************** svg(plot_hist_or_muts) print(paste0("plot3 filename:", plot_hist_or_muts)) #-------------- # start plot 3 #-------------- #par(mar=c(b, l, t, r)) par(mar=c(5,6,1,0)) hist(df3$or_mychisq , freq = T , breaks = 30 , xlab = "Odds Ratio" , ylab = "Frequency" , main = "" , cex.lab = 1.7 , cex.axis = 1.5 , cex.main = 1.5 , cex.sub = 1.5) dev.off() #==================================================================== #========== # combine and output #========== #-------------- # combine: af and or #------------- svg(plot_hist_af_or, width = 10, height = 8) print(paste0("plot3 filename:", plot_hist_af_or)) #par(bty = "l") par(mfrow=c(2,1)) par(mar=c(4.5, 5.5, 2, 0)) hist(df3$af , freq = T , breaks = 30 , xlab = "Minor Allele Frequency (MAF)" , ylab = "Frequency" , main = "" , cex.lab = 1.3 , cex.axis = 1.3 , cex.main = 1.5 , cex.sub = 1.5) # print the overall labels mtext(expression(bold('(a)')), side = 3, adj = -0.1, cex = 1.8) hist(df3$or_mychisq , freq = T , breaks = 30 , xlab = "Odds Ratio (OR)" , ylab = "Frequency" , main = "" , cex.lab = 1.3 , cex.axis = 1.3 , cex.main = 1.5 , cex.sub = 1.5) # print the overall labels mtext(expression(bold('(b)')), side = 3, adj = -0.1, cex = 1.8) dev.off() #-------------- # combine: af (mutations and samples) #------------- svg(plot_hist_af_muts_sample, width = 10, height = 8) print(paste0("plot3 filename:", plot_hist_af_muts_sample)) #par(bty = "l") par(mfrow = c(1,2)) par(mar=c(4.5, 5.5, 2, 0)) hist(df3$af , freq = T , breaks = 30 , xlab = "Minor Allele Frequency (MAF)" , ylab = "Frequency" , main = paste0(nrow(df3),"_pnca_mutations") , cex.lab = 1.3 , cex.axis = 1.3 , cex.main = 1.5 , cex.sub = 1.5) hist(df2$af , freq = T , breaks = 30 , xlab = "Minor Allele Frequency (MAF)" , ylab = "Frequency" , main = paste0(nrow(df2),"_pnca_samples") , cex.lab = 1.3 , cex.axis = 1.3 , cex.main = 1.5 , cex.sub = 1.5) dev.off() ######################################################## ############# # ggplots ############# #library(plyr) df3_af_median <- ddply(df3, "mutation_info_labels", summarise, grp.median = median(af, na.rm = T)) head(df3_af_median) my_ats = 15 # axis text size my_als = 18 # axis label size theme_set(theme_grey()) g_af_hist = ggplot(df3, aes(x = af)) + geom_histogram(colour = "white") + theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.title.x = element_text(size = my_ats) #, axis.title.x = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() , plot.title = element_blank())+ labs(y = "Count" , x = "Minor Allele Frequency" ) g_af_hist #===================================================================== df3_af_median <- ddply(df3, "mutation_info_labels", summarise, grp.median = median(af, na.rm = T)) head(df3_af_median) g_af_hist_col = ggplot(df3, aes(x = af, fill = mutation_info_labels)) + geom_histogram(position = "stack") + scale_fill_manual(values = c("#E69F00", "#999999")) + theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.title.x = element_text(size = my_ats) #, axis.title.x = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() , plot.title = element_blank())+ labs(y = "Count" , x = "Minor Allele Frequency" ) g_af_hist_col g_af_hist_col_med = g_af_hist_col + geom_vline(data = df3_af_median, aes(xintercept = grp.median), linetype = "dashed") g_af_hist_col_med #===================================================================== g_af_mutinfo = ggplot(df3, aes(x = af , fill = mutation_info_labels)) + scale_fill_manual(values = c("#E69F00", "#999999")) + geom_histogram() + facet_grid(mutation_info_labels ~ ., scales = "free") + #facet_wrap(mutation_info_labels ~ ., scales = "free") + theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) , axis.title.x = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() , plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5) #, strip.text = element_text(size = my_als) , strip.text = element_blank() , strip.background = element_blank() , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) , legend.position = c(0.8, 0.9)) + labs(title = "Minor Allele Frequency (MAF)" , x = "MAF" , y = "Count" , fill = "Mutation class") g_af_mutinfo g_af_mutinfo_med = g_af_mutinfo + geom_vline(data = df3_af_median, aes(xintercept = grp.median), linetype = "dashed") g_af_mutinfo_med #===================================================================== my_comparisons <- list( c("DM", "OM") ) g_af_bp = ggplot(df3, aes(x = mutation_info_labels , y = af , fill = mutation_info_labels))+ scale_fill_manual(values = c("#E69F00", "#999999")) + geom_boxplot()+ theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) , axis.title.x = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() , plot.title = element_blank() , strip.text = element_text(size = my_als) , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) , legend.position = "none") + labs(y = "MAF" , x = "" , fill = "Mutation class") g_af_bp_stats = g_af_bp + stat_compare_means(comparisons = my_comparisons , method = "wilcox.test" , paired = FALSE #, label = "p.format" , label = "p.signif") g_af_bp_stats ###################################################################### # OR ###################################################################### df3_or_median <- ddply(df3, "mutation_info_labels", summarise, grp.median = median(or_mychisq, na.rm = T)) head(df3_or_median) g_or_mutinfo = ggplot(df3, aes(x = or_mychisq , fill = mutation_info_labels)) + scale_fill_manual(values = c("#E69F00", "#999999")) + geom_histogram() + facet_grid(mutation_info_labels ~ ., scales = "free") + #facet_wrap(mutation_info_labels ~ ., scales = "free") + theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) , axis.title.x = element_text(size = my_ats) , axis.title.y = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.ticks.y = element_blank() , plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5) #, strip.text = element_text(size = my_als) , strip.text = element_blank() , strip.background = element_blank() , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) , legend.position = c(0.8, 0.9))+ labs(title = "Odds Ratio (OR)" , x = "OR" , y = "Count" , fill = "Mutation class") g_or_mutinfo g_or_mutinfo_med = g_or_mutinfo + geom_vline(data = df3_or_median, aes(xintercept = grp.median), linetype = "dashed") g_or_mutinfo_med #===================================================================== g_or_bp = ggplot(df3, aes(x = mutation_info_labels , y = or_mychisq , fill = mutation_info_labels))+ scale_fill_manual(values = c("#E69F00", "#999999")) + geom_boxplot()+ theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) , axis.title.x = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() , plot.title = element_blank() , strip.text = element_text(size = my_als) , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) , legend.position = "none") + labs(y = "OR" , x = "" , fill = "Mutation class") g_or_bp_stats = g_or_bp + stat_compare_means(comparisons = my_comparisons , method = "wilcox.test" , paired = FALSE #, label = "p.format" , label = "p.signif") g_or_bp_stats ############################################################################ #============================== # combine plots for outputs #============================== #------------------------------------ # Plot 1: hist withOUT median line #------------------------------------ # combined plots without median #print(paste0("plot combined filename:", plot_af_or_combined)) #svg(plot_af_or_combined, width = 16, height = 9) c_combined = cowplot::plot_grid(g_af_mutinfo , g_af_bp_stats , g_or_mutinfo , g_or_bp_stats , nrow = 2 , labels = c("(a)", "(b)", "(c)", "(d)") , rel_widths = c(2/3, 1/3) , label_size = 20) #print(c_combined) #dev.off() #------------------------------- # Plot 2: hist WITH median line #------------------------------- print(paste0("plot combined filename:", plot_af_or_combined_med)) svg(plot_af_or_combined_med, width = 16, height = 9) c_combined_med = cowplot::plot_grid(g_af_mutinfo_med , g_af_bp_stats , g_or_mutinfo_med , g_or_bp_stats , nrow = 2 , labels = c("(a)", "(b)", "(c)", "(d)") , rel_widths = c(3/4, 1/4) , label_size = 20) print(c_combined_med) dev.off() ######################################################################