#!/usr/bin/env Rscript ######################################################### # TASK: Corr plots for PS and Lig # Output: 1 svg #======================================================================= # working dir and loading libraries getwd() setwd("~/git/LSHTM_analysis/scripts/plotting/") getwd() source("Header_TT.R") require(cowplot) source("combining_dfs_plotting.R") source("my_pairs_panel.R") # with lower panel turned off # should return the following dfs, directories and variables # PS combined: # 1) merged_df2 # 2) merged_df2_comp # 3) merged_df3 # 4) merged_df3_comp # LIG combined: # 5) merged_df2_lig # 6) merged_df2_comp_lig # 7) merged_df3_lig # 8) merged_df3_comp_lig # 9) my_df_u # 10) my_df_u_lig cat(paste0("Directories imported:" , "\ndatadir:", datadir , "\nindir:", indir , "\noutdir:", outdir , "\nplotdir:", plotdir)) cat(paste0("Variables imported:" , "\ndrug:", drug , "\ngene:", gene , "\ngene_match:", gene_match , "\nAngstrom symbol:", angstroms_symbol , "\nNo. of duplicated muts:", dup_muts_nu , "\nNA count for ORs:", na_count , "\nNA count in df2:", na_count_df2 , "\nNA count in df3:", na_count_df3)) #======= # output #======= # can't combine by cowplot because not ggplots #corr_plot_combined = "corr_combined.svg" #plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined) # PS corr_ps = "corr_PS.svg" plot_corr_ps = paste0(plotdir,"/", corr_ps) corr_ps_duet_col = "corr_PS_duet_coloured.svg" plot_corr_ps_duet_col = paste0(plotdir,"/", corr_ps_duet_col) corr_upper_ps = "corr_upper_PS.svg" plot_corr_upper_ps = paste0(plotdir,"/", corr_upper_ps) # LIG corr_lig = "corr_LIG.svg" plot_corr_lig = paste0(plotdir,"/", corr_lig) corr_upper_lig = "corr_upper_LIG.svg" plot_corr_upper_lig = paste0(plotdir,"/", corr_upper_lig) #################################################################### # end of loading libraries and functions # ######################################################################## #%%%%%%%%%%%%%%%%%%%%%%%%% df_ps = merged_df3_comp df_lig = merged_df3_comp_lig #%%%%%%%%%%%%%%%%%%%%%%%%% rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig) ######################################################################## # end of data extraction and cleaning for plots # ######################################################################## #============================ # adding foldx scaled values # scale data b/w -1 and 1 #============================ n = which(colnames(df_ps) == "ddg"); n my_min = min(df_ps[,n]); my_min my_max = max(df_ps[,n]); my_max df_ps$foldx_scaled = ifelse(df_ps[,n] < 0 , df_ps[,n]/abs(my_min) , df_ps[,n]/my_max) # sanity check my_min = min(df_ps$foldx_scaled); my_min my_max = max(df_ps$foldx_scaled); my_max if (my_min == -1 && my_max == 1){ cat("PASS: foldx ddg successfully scaled b/w -1 and 1" , "\nProceeding with assigning foldx outcome category") }else{ cat("FAIL: could not scale foldx ddg values" , "Aborting!") } #================================ # adding foldx outcome category # ddg<0 = "Stabilising" (-ve) #================================= c1 = table(df_ps$ddg < 0) df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising") c2 = table(df_ps$ddg < 0) if ( all(c1 == c2) ){ cat("PASS: foldx outcome successfully created") }else{ cat("FAIL: foldx outcome could not be created. Aborting!") exit() } table(df_ps$foldx_outcome) #====================== # adding log cols #====================== df_ps$log10_or_mychisq = log10(df_ps$or_mychisq) df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher) df_ps$log10_or_kin = log10(df_ps$or_kin) df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin) #=========================== # Data for Correlation plots:PS #=========================== # subset data to generate pairwise correlations cols_to_select = c("duet_scaled" , "foldx_scaled" , "log10_or_mychisq" , "neglog_pval_fisher" , "af" , "duet_outcome" , drug) corr_data_ps = df_ps[cols_to_select] dim(corr_data_ps) #p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic #p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic # assign nice colnames (for display) my_corr_colnames = c("DUET" , "Foldx" , "Log (OR)" , "-Log (P)" , "AF" , "duet_outcome" , drug) length(my_corr_colnames) colnames(corr_data_ps) colnames(corr_data_ps) <- my_corr_colnames colnames(corr_data_ps) start = 1 end = which(colnames(corr_data_ps) == drug); end # should be the last column offset = 1 my_corr_ps = corr_data_ps[start:(end-offset)] head(my_corr_ps) #my_cols = c("#f8766d", "#00bfc4") # deep blue :#007d85 # deep red: #ae301e #--------------------------------------- # generate corr PS plot 1: both panels #--------------------------------------- cat("Corr plot PS DUET with coloured dots:", plot_corr_ps) svg(plot_corr_ps, width = 15, height = 15) #cat("Corr plot PS DUET with coloured dots:",plot_corr_ps_duet_col) #svg(plot_corr_ps_duet_col, width = 15, height = 15) OutPlot1 = pairs.panels(my_corr_ps[1:(length(my_corr_ps)-1)] , method = "spearman" # correlation method , hist.col = "grey" ##00AFBB , density = TRUE # show density plots , ellipses = F # show correlation ellipses , stars = T , rug = F , breaks = "Sturges" , show.points = T #, bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps$duet_outcome))] # can't use colour as duet and foldx are opposite #, pch = 21 # for bg , pch = 19 , jitter = T , alpha = 1 #, points(pch = 19, col = c("#f8766d", "#00bfc4")) , cex = 1.8 , cex.axis = 2 , cex.labels = 4 , cex.cor = 1 , smooth = F ) print(OutPlot1) dev.off() #---------------------------------------------- # generate corr PS plot 2: upper_panel only #---------------------------------------------- cat("Corr plot upper PS:", plot_corr_upper_ps) svg(plot_corr_upper_ps, width = 15, height = 15) OutPlot1_upper = my_pp(my_corr_ps[1:(length(my_corr_ps)-1)] # no lower panel , method = "spearman" # correlation method , hist.col = "grey" ##00AFBB , density = TRUE # show density plots , ellipses = F # show correlation ellipses , stars = T , rug = F , breaks = "Sturges" , show.points = F , cex = 3 , cex.axis = 1.6 , cex.labels = 4 , cex.cor = 1 , smooth = F ) print(OutPlot1_upper) dev.off() ################################################################################################ #=========================== # Data for Correlation plots: LIG #=========================== table(df_lig$ligand_outcome) df_lig$log10_or_mychisq = log10(df_lig$or_mychisq) df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher) df_lig$log10_or_kin = log10(df_lig$or_kin) df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin) # subset data to generate pairwise correlations cols_to_select = c("affinity_scaled" , "log10_or_mychisq" , "neglog_pval_fisher" , "af" , "ligand_outcome" , drug) corr_data_lig = df_lig[, cols_to_select] dim(corr_data_lig) # assign nice colnames (for display) my_corr_colnames = c("Ligand Affinity" , "Log (OR)" , "-Log (P)" , "AF" , "ligand_outcome" , drug) length(my_corr_colnames) colnames(corr_data_lig) colnames(corr_data_lig) <- my_corr_colnames colnames(corr_data_lig) start = 1 end = which(colnames(corr_data_lig) == drug); end # should be the last column offset = 1 my_corr_lig = corr_data_lig[start:(end-offset)] head(my_corr_lig) #--------------------------------------- # generate corr LIG plot 1: both panels #--------------------------------------- cat("Corr LIG plot:", plot_corr_lig) svg(plot_corr_lig, width = 15, height = 15) # uncomment as necessary OutPlot2 = pairs.panels(my_corr_lig[1:(length(my_corr_lig)-1)] , method = "spearman" # correlation method , hist.col = "grey" ##00AFBB , density = TRUE # show density plots , ellipses = F # show correlation ellipses , stars = T , rug = F , breaks = "Sturges" , show.points = T #, bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig$ligand_outcome))] # can't use colour as duet and foldx are opposite #, pch = 21 # for bg , pch = 19 , jitter = T #, alpha = .05 #, points(pch = 19, col = c("#f8766d", "#00bfc4")) , cex = 2 , cex.axis = 2 , cex.labels = 4 , cex.cor = 1 , smooth = F ) print(OutPlot2) dev.off() #--------------------------------------- # generate corr LIG plot 2: upper panels #--------------------------------------- cat("Corr LIG plot:", plot_corr_upper_lig) svg(plot_corr_upper_lig, width = 15, height = 15) # uncomment as necessary OutPlot2_upper = my_pp(my_corr_lig[1:(length(my_corr_lig)-1)] , method = "spearman" # correlation method , hist.col = "grey" ##00AFBB , density = TRUE # show density plots , ellipses = F # show correlation ellipses , stars = T , rug = F , breaks = "Sturges" , show.points = F #, alpha = .05 #, points(pch = 19, col = c("#f8766d", "#00bfc4")) , cex = 3 , cex.axis = 1.6 , cex.labels = 4 , cex.cor = 1 , smooth = F ) print(OutPlot2_upper) dev.off() #######################################################