#!/usr/bin/env Rscript ######################################################### # TASK: producing histogram of AF and OR # output svgs # 1) hist of af # 2) hist of or # 3) hist of af_or combined # 4) hist of af: from muts and samples dfs combined (EXPLORATORY!) # 5) hist and barplots for af_or combined with median line # 6) hist and barplots for af_or combined withOUT median line (Turned OFF) ######################################################### #======================================================================= # working dir and loading libraries getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() #source("Header_TT.R") library(ggplot2) library(data.table) library(dplyr) library(plyr) #source("plotting_data.R") source("combining_dfs_plotting.R") #======================================================================= #======= # output #======= # plot: with median line on hist af_or_combined_med = "hist_bp_muts_combined_median_labelled.svg" plot_af_or_combined_med = paste0(plotdir, "/", af_or_combined_med) # plot 6: without median line on hist af_or_combined = "hist_bp_muts_combined.svg" plot_af_or_combined = paste0(plotdir, "/", af_or_combined) #======================================================================= merged_df3_comp$mutation_info_labels = ifelse(merged_df3_comp$mutation_info == dr_muts_col, "DM", "OM") table(merged_df3_comp$mutation_info_labels) table(merged_df3_comp$mutation_info) == table(merged_df3_comp$mutation_info_labels) sum(table(merged_df3_comp$mutation_info)) merged_df2_comp$mutation_info_labels = ifelse(merged_df2_comp$mutation_info == dr_muts_col, "DM", "OM") table(merged_df2_comp$mutation_info_labels) table(merged_df2_comp$mutation_info) == table(merged_df2_comp$mutation_info_labels) sum(table(merged_df2_comp$mutation_info)) #================ # Data for plots #================ # REASSIGNMENT as necessary #df = my_df_u df3 = merged_df3_comp # Contains duplicated samples, so you need to remove that for AF hist merged_df2_comp_u = merged_df2_comp[!duplicated(merged_df2_comp$id),] if ( nrow(merged_df2_comp_u) == length(unique(merged_df2_comp$id)) ){ cat("PASS: duplicated samples ommitted. Assiging for plotting...") df2 = merged_df2_comp_u }else{ cat("FAIL: duplicated samples could not be ommitted. Length mismatch" , "Expected nrows:", length(unique(merged_df2_comp$id)) , "Got nrows:", nrow(merged_df2_comp_u)) } df2_af_median <- ddply(df2, "mutation_info_labels", summarise, grp.median = median(af, na.rm = T)) head(df2_af_median) ######################################################## ############# # ggplots ############# my_ats = 15 # axis text size my_als = 18 # axis label size #theme_set(theme_grey()) #----------- # AF: hist #----------- g_af_hist = ggplot(df3, aes(x = af)) + geom_histogram(colour = "white") + theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.title.x = element_text(size = my_ats) #, axis.title.x = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() , plot.title = element_blank())+ #, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5))+ labs(title = "Minor Allele Frequency (MAF)" , x = "MAF" , y = "Count") g_af_hist #===================================================================== #------------------------ # AF: hist coloured by # mutation class in the # same graph #--------------------- #library(plyr) df3_af_median <- ddply(df3, "mutation_info_labels", summarise, grp.median = median(af, na.rm = T)) head(df3_af_median) g_af_hist_col = ggplot(df3, aes(x = af, fill = mutation_info_labels)) + geom_histogram(position = "stack") + scale_fill_manual(values = c("#E69F00", "#999999")) + theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.title.x = element_text(size = my_ats) #, axis.title.x = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() , plot.title = element_blank())+ labs(y = "Count" , x = "Minor Allele Frequency" ) g_af_hist_col g_af_hist_col_med = g_af_hist_col + geom_vline(data = df3_af_median, aes(xintercept = grp.median),linetype = "dashed") g_af_hist_col_med #===================================================================== #--------------------------------- # AF: hist facet by mutation_class #--------------------------------- g_af_mutinfo = ggplot(df3, aes(x = af , fill = mutation_info_labels)) + scale_fill_manual(values = c("#E69F00", "#999999")) + geom_histogram() + facet_grid(mutation_info_labels ~ ., scales = "free") + #facet_wrap(mutation_info_labels ~ ., scales = "free") + theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) , axis.title.x = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() #, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5) , plot.title = element_blank() #, strip.text = element_text(size = my_als) , strip.text = element_blank() , strip.background = element_blank() , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) , legend.position = c(0.8, 0.9)) + labs(title = "Minor Allele Frequency (MAF)" , x = "MAF" , y = "Count" , fill = "Mutation class") g_af_mutinfo g_af_mutinfo_med = g_af_mutinfo + geom_vline(data = df3_af_median, aes(xintercept = grp.median),linetype = "dashed") g_af_mutinfo_med #===================================================================== #------------------------ # AF: boxplot with stats #------------------------ my_comparisons <- list( c("DM", "OM") ) g_af_bp = ggplot(df3, aes(x = mutation_info_labels , y = af , fill = mutation_info_labels))+ scale_fill_manual(values = c("#E69F00", "#999999")) + geom_boxplot() + theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) , axis.title.x = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() , plot.title = element_blank() , strip.text = element_text(size = my_als) , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) , legend.position = "none") + labs(y = "MAF" , x = "" , fill = "Mutation class") g_af_bp_stats = g_af_bp + stat_compare_means(comparisons = my_comparisons , method = "wilcox.test" , paired = FALSE #, label = "p.format" , label = "p.signif") g_af_bp_stats ###################################################################### # OR plots ###################################################################### #----------- # OR: hist #----------- g_or_hist = ggplot(df3, aes(x = or_mychisq)) + geom_histogram(colour = "white") + theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.title.x = element_text(size = my_ats) #, axis.title.x = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() , plot.title = element_blank())+ #, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5))+ labs(title = "Odds Ratio (OR)" , x = "OR" , y = "Count") g_or_hist #===================================================================== #--------------------------------- # OR: hist facet by mutation_class #--------------------------------- df3_or_median <- ddply(df3, "mutation_info_labels", summarise, grp.median = median(or_mychisq, na.rm = T)) head(df3_or_median) g_or_mutinfo = ggplot(df3, aes(x = or_mychisq , fill = mutation_info_labels)) + scale_fill_manual(values = c("#E69F00", "#999999")) + geom_histogram() + facet_grid(mutation_info_labels ~ ., scales = "free") + #facet_wrap(mutation_info_labels ~ ., scales = "free") + scale_y_continuous(breaks = c(0, 15, 30, 45, 60, 75)) + theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) , axis.title.x = element_text(size = my_ats) , axis.title.y = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.ticks.y = element_blank() #, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5) , plot.title = element_blank() #, strip.text = element_text(size = my_als) , strip.text = element_blank() , strip.background = element_blank() , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) , legend.position = c(0.8, 0.9))+ labs(title = "Odds Ratio (OR)" , x = "OR" , y = "Count" , fill = "Mutation class") g_or_mutinfo g_or_mutinfo_med = g_or_mutinfo + geom_vline(data = df3_or_median, aes(xintercept = grp.median), linetype = "dashed") g_or_mutinfo_med #===================================================================== #------------------------ # OR: boxplot with stats #------------------------ g_or_bp = ggplot(df3, aes(x = mutation_info_labels , y = or_mychisq , fill = mutation_info_labels))+ scale_fill_manual(values = c("#E69F00", "#999999")) + geom_boxplot()+ theme(axis.text.x = element_text(size = my_ats) , axis.text.y = element_text(size = my_ats) , axis.title.x = element_text(size = my_ats) #, axis.title.y = element_blank() , axis.title.y = element_text(size = my_ats) , axis.ticks.y = element_blank() , plot.title = element_blank() , strip.text = element_text(size = my_als) , legend.text = element_text(size = my_als-4) , legend.title = element_text(size = my_als-4) , legend.position = "none") + labs(y = "OR" , x = "" , fill = "Mutation class") g_or_bp_stats = g_or_bp + stat_compare_means(comparisons = my_comparisons , method = "wilcox.test" , paired = FALSE #, label = "p.format" , label = "p.signif") g_or_bp_stats ############################################################################ #============================== # combine plots for outputs #============================== #----------------------------------------- # combine the AF plots with overall title #----------------------------------------- p1_af = cowplot::plot_grid(g_af_hist , g_af_mutinfo_med #, g_af_mutinfo # without median line , g_af_bp_stats , nrow = 1 #, labels = c("(a)", "(b)", "(c)") , labels = c("A", "B", "C") , hjust = -1.5 , vjust = 0.2 , rel_widths = c(1.2/4, 2/4, 0.8/4) , label_size = 18) p1_af title_af = ggdraw() + draw_label("Minor Allele Frequency (MAF)", fontface='bold', size = my_ats + 5) p1_af_title = plot_grid(title_af, p1_af, ncol = 1, rel_heights = c(0.1, 1)) # rel_heights values control title margins p1_af_title #----------------------------------------- # combine the OR plots with overall title #----------------------------------------- p2_or = cowplot::plot_grid(g_or_hist , g_or_mutinfo_med #, g_or_mutinfo # without median line , g_or_bp_stats , nrow = 1 #, labels = c("(d)", "(e)", "(f)") , labels = c("D", "E", "F") , hjust = -1.5 , vjust = 0.2 , rel_widths = c(1.2/4, 2/4, 0.8/4) , label_size = 18) p2_or title_or = ggdraw() + draw_label("Odds Ratio (OR)", fontface = 'bold', size = my_ats + 5) p2_or_title = plot_grid(title_or, p2_or, ncol = 1, rel_heights = c(0.1, 1)) # rel_heights values control title margins p2_or_title #---------------------------------------- # Plot 1: Combined AF and OR hist plots # with median line on hist #---------------------------------------- print(paste0("plot combined filename:", plot_af_or_combined_med)) svg(plot_af_or_combined_med, width = 15, height = 9) p_combined_med = cowplot::plot_grid(p1_af_title, p2_or_title, nrow = 2) p_combined_med dev.off() #---------------------------------------- # Plot 2: Combined AF and OR hist plots # with median line on hist #---------------------------------------- # uncomment the correct vars in p1_af and p2_or to generate this #print(paste0("plot combined filename:", plot_af_or_combined)) #svg(plot_af_or_combined, width = 15, height = 9) #p_combined = cowplot::plot_grid(p1_af_title, p2_or_title, nrow = 2) #p_combined #dev.off() ######################################################################