#!/usr/bin/env Rscript ######################################################### # TASK: producing histogram of AF and OR # output svgs # 1) hist of af # 2) hist of or # 3) hist of af_or combined # 4) hist of af: from muts and samples dfs combined (EXPLORATORY!) # 5) hist and barplots for af_or combined with median line # 6) hist and barplots for af_or combined withOUT median line (Turned OFF) ######################################################### #======================================================================= # working dir and loading libraries getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() #source("Header_TT.R") library(ggplot2) library(data.table) library(dplyr) library(plyr) #source("plotting_data.R") source("combining_dfs_plotting.R") #======================================================================= #======= # output #======= # plot 1 hist_af_muts = "hist_mutations_AF.svg" plot_hist_af_muts = paste0(plotdir,"/", hist_af_muts) # plot 2 hist_or_muts = "hist_mutations_OR.svg" plot_hist_or_muts = paste0(plotdir,"/", hist_or_muts) # plot 3 hist_af_muts_sample = "hist_af_muts_sample_combined.svg" plot_hist_af_muts_sample = paste0(plotdir,"/", hist_af_muts_sample) # plot 4 hist_af_or = "hist_af_or_combined.svg" plot_hist_af_or = paste0(plotdir,"/", hist_af_or) # plot 5 af_or_combined_med = "hist_bp_muts_combined_median.svg" plot_af_or_combined_med = paste0(plotdir, "/", af_or_combined_med) # plot 6: without median line on hist af_or_combined = "hist_bp_muts_combined.svg" plot_af_or_combined = paste0(plotdir, "/", af_or_combined) #======================================================================= merged_df3_comp$mutation_info_labels = ifelse(merged_df3_comp$mutation_info == dr_muts_col, "DM", "OM") table(merged_df3_comp$mutation_info_labels) table(merged_df3_comp$mutation_info) == table(merged_df3_comp$mutation_info_labels) sum(table(merged_df3_comp$mutation_info)) merged_df2_comp$mutation_info_labels = ifelse(merged_df2_comp$mutation_info == dr_muts_col, "DM", "OM") table(merged_df2_comp$mutation_info_labels) table(merged_df2_comp$mutation_info) == table(merged_df2_comp$mutation_info_labels) sum(table(merged_df2_comp$mutation_info)) #================ # Data for plots #================ # REASSIGNMENT as necessary #df = my_df_u df3 = merged_df3_comp # Contains duplicated samples, so you need to remove that for AF hist merged_df2_comp_u = merged_df2_comp[!duplicated(merged_df2_comp$id),] if ( nrow(merged_df2_comp_u) == length(unique(merged_df2_comp$id)) ){ cat("PASS: duplicated samples ommitted. Assiging for plotting...") df2 = merged_df2_comp_u }else{ cat("FAIL: duplicated samples could not be ommitted. Length mismatch" , "Expected nrows:", length(unique(merged_df2_comp$id)) , "Got nrows:", nrow(merged_df2_comp_u)) } #======================================================================= #**************** # Plot 1: AF distribution: mutations #**************** svg(plot_hist_af_muts) print(paste0("plot1 filename:", plot_hist_af_muts)) #par(mar=c(b, l, t, r)) par(mar = c(4.5, 6, 1.8, 0)) hist(df3$af , freq = T , breaks = 30 , xlab = "MAF" , ylab = "Frequency" , main = "Minor Allele Frequency (MAF)" , cex.lab = 1.7 , cex.axis = 1.5 , cex.main = 1.5 , cex.sub = 1.5) dev.off() #**************** # Plot 2: AF distribution: samples #**************** par(mar = c(4.5, 6, 1.8, 0)) hist(df2$af , freq = T , breaks = 30 , xlab = "MAF" , ylab = "Frequency" , main = "Minor Allele Frequency (MAF)" , cex.lab = 1.7 , cex.lab = 1.7 , cex.axis = 1.5 , cex.main = 1.5 , cex.sub = 1.5) #**************** # Plot 3: OF distribution: mutations #**************** svg(plot_hist_or_muts) print(paste0("plot3 filename:", plot_hist_or_muts)) par(mar = c(4.5, 6, 1.8, 0)) hist(df3$or_mychisq , freq = T , breaks = 30 , xlab = "OR" , ylab = "Frequency" , main = "Odds Ratio (OR)" , cex.lab = 1.7 , cex.axis = 1.5 , cex.main = 1.5 , cex.sub = 1.5) dev.off() #==================================================================== #========== # combine and output #========== #-------------- # combine: af and or #------------- svg(plot_hist_af_or, width = 10, height = 8) print(paste0("plot3 filename:", plot_hist_af_or)) par(mfrow = c(1,2)) par(mar = c(4.5, 5.5, 2, 0)) hist(df3$af , freq = T , breaks = 30 , xlab = "Minor Allele Frequency (MAF)" , ylab = "Frequency" , main = "" , cex.lab = 1.3 , cex.axis = 1.3 , cex.main = 1.5 , cex.sub = 1.5) # print the overall labels mtext(expression(bold('(a)')), side = 3, adj = -0.1, cex = 1.8) hist(df3$or_mychisq , freq = T , breaks = 30 , xlab = "Odds Ratio (OR)" , ylab = "Frequency" , main = "" , cex.lab = 1.3 , cex.axis = 1.3 , cex.main = 1.5 , cex.sub = 1.5) # print the overall labels mtext(expression(bold('(b)')), side = 3, adj = -0.1, cex = 1.8) dev.off() #-------------- # combine: af (mutations and samples) #------------- svg(plot_hist_af_muts_sample, width = 10, height = 8) print(paste0("plot3 filename:", plot_hist_af_muts_sample)) par(mfrow = c(1,2)) par(mar = c(4.5, 5.5, 2, 0)) hist(df3$af , freq = T , breaks = 30 , xlab = "Minor Allele Frequency (MAF)" , ylab = "Frequency" , main = paste0(nrow(df3),"_pnca_mutations") , cex.lab = 1.3 , cex.axis = 1.3 , cex.main = 1.5 , cex.sub = 1.5) hist(df2$af , freq = T , breaks = 30 , xlab = "Minor Allele Frequency (MAF)" , ylab = "Frequency" , main = paste0(nrow(df2),"_pnca_samples") , cex.lab = 1.3 , cex.axis = 1.3 , cex.main = 1.5 , cex.sub = 1.5) dev.off() ########################################################