From 369c906a33f11f09f12fbe3b8d32c7840d23fe9e Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 18 Sep 2020 11:56:19 +0100 Subject: [PATCH] added ggcorr plots combined for all params --- scripts/plotting/ggcorr_all_PS_LIG.R | 389 +++++++++++++++++++++++++++ 1 file changed, 389 insertions(+) create mode 100644 scripts/plotting/ggcorr_all_PS_LIG.R diff --git a/scripts/plotting/ggcorr_all_PS_LIG.R b/scripts/plotting/ggcorr_all_PS_LIG.R new file mode 100644 index 0000000..aa35922 --- /dev/null +++ b/scripts/plotting/ggcorr_all_PS_LIG.R @@ -0,0 +1,389 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Corr plots for PS and Lig + +# Output: 1 svg + +#======================================================================= +# working dir and loading libraries +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + + +source("Header_TT.R") +require(cowplot) +source("combining_dfs_plotting.R") +source("my_pairs_panel.R") +# should return the following dfs, directories and variables + +# PS combined: +# 1) merged_df2 +# 2) merged_df2_comp +# 3) merged_df3 +# 4) merged_df3_comp + +# LIG combined: +# 5) merged_df2_lig +# 6) merged_df2_comp_lig +# 7) merged_df3_lig +# 8) merged_df3_comp_lig + +# 9) my_df_u +# 10) my_df_u_lig + +cat(paste0("Directories imported:" + , "\ndatadir:", datadir + , "\nindir:", indir + , "\noutdir:", outdir + , "\nplotdir:", plotdir)) + +cat(paste0("Variables imported:" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nAngstrom symbol:", angstroms_symbol + , "\nNo. of duplicated muts:", dup_muts_nu + , "\nNA count for ORs:", na_count + , "\nNA count in df2:", na_count_df2 + , "\nNA count in df3:", na_count_df3)) + +#======= +# output +#======= +# can't combine by cowplot because not ggplots +#corr_plot_combined = "corr_combined.svg" +#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined) + +# PS +ggcorr_all_ps = "ggcorr_all_PS.svg" +plot_ggcorr_all_ps = paste0(plotdir,"/", ggcorr_all_ps) + +# LIG +ggcorr_all_lig = "ggcorr_all_LIG.svg" +plot_ggcorr_all_lig = paste0(plotdir,"/", ggcorr_all_lig ) + +# combined +ggcorr_all_combined_labelled = "ggcorr_all_combined_labelled.svg" +plot_ggcorr_all_combined_labelled = paste0(plotdir,"/", ggcorr_all_combined_labelled) + +#################################################################### +# end of loading libraries and functions # +######################################################################## + +#%%%%%%%%%%%%%%%%%%%%%%%%% +#df_ps = merged_df3_comp +#df_lig = merged_df3_comp_lig + +df_ps = merged_df3 +df_lig = merged_df3_lig + +#%%%%%%%%%%%%%%%%%%%%%%%%% + +rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig) + +######################################################################## +# end of data extraction and cleaning for plots # +######################################################################## + +#=========================== +# Data for Correlation plots:PS +#=========================== +table(df_ps$duet_outcome) + + +#=========================== +# Data for Correlation plots:foldx +#=========================== +#============================ +# adding foldx scaled values +# scale data b/w -1 and 1 +#============================ +n = which(colnames(df_ps) == "ddg"); n + +my_min = min(df_ps[,n]); my_min +my_max = max(df_ps[,n]); my_max + +df_ps$foldx_scaled = ifelse(df_ps[,n] < 0 + , df_ps[,n]/abs(my_min) + , df_ps[,n]/my_max) +# sanity check +my_min = min(df_ps$foldx_scaled); my_min +my_max = max(df_ps$foldx_scaled); my_max + +if (my_min == -1 && my_max == 1){ + cat("PASS: foldx ddg successfully scaled b/w -1 and 1" + , "\nProceeding with assigning foldx outcome category") +}else{ + cat("FAIL: could not scale foldx ddg values" + , "Aborting!") +} + + +#================================ +# adding foldx outcome category +# ddg<0 = "Stabilising" (-ve) +#================================= + +c1 = table(df_ps$ddg < 0) +df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising") +c2 = table(df_ps$ddg < 0) + +if ( all(c1 == c2) ){ + cat("PASS: foldx outcome successfully created") +}else{ + cat("FAIL: foldx outcome could not be created. Aborting!") + exit() +} + +table(df_ps$foldx_outcome) + + +#====================== +# adding log cols +#====================== + +df_ps$log10_or_mychisq = log10(df_ps$or_mychisq) +df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher) + +df_ps$log10_or_kin = log10(df_ps$or_kin) +df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin) + +# subset data to generate pairwise correlations +cols_to_select = c("duet_scaled" + + , "foldx_scaled" + + , "log10_or_mychisq" + , "neglog_pval_fisher" + + , "or_kin" + , "neglog_pwald_kin" + + , "af" + + , "asa" + , "rsa" + , "kd_values" + , "rd_values" + + , "duet_outcome" + , drug) + +corr_data_ps = df_ps[, cols_to_select] + +dim(corr_data_ps) + +#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic +#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic + +# assign nice colnames (for display) +my_corr_colnames = c("DUET" + + , "Foldx" + + , "Log (OR)" + , "-Log (P)" + + , "OR (adjusted)" + , "-Log (P wald)" + + , "AF" + + , "ASA" + , "RSA" + , "KD" + , "RD" + + , "duet_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_ps) +colnames(corr_data_ps) <- my_corr_colnames +colnames(corr_data_ps) + +#------------------------ +# Data for ggcorr PS plot +#------------------------ +start = 1 +end_ggcorr = which(colnames(corr_data_ps) == "duet_outcome"); end_ggcorr # should be the last column +offset = 1 + +my_ggcorr_ps = corr_data_ps[start:(end_ggcorr-1)] +head(my_ggcorr_ps) + +# correlation matrix +corr1 <- round(cor(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs"), 1) + +# p-value matrix +pmat1 <- cor_pmat(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs") + +corr2 = psych::corr.test(my_ggcorr_ps + , method = "spearman" + , use = "pairwise.complete.obs")$r +corr2 = round(corr2, 1) + +pmat2 = psych::corr.test(my_ggcorr_ps + , method = "spearman" + , adjust = "none" + , use = "pairwise.complete.obs")$p + +corr1== corr2 +pmat1==pmat2 + +#------------------------ +# Generate ggcorr PS plot +#------------------------ +cat("ggCorr plot PS:", plot_ggcorr_all_ps) +svg(plot_ggcorr_all_ps, width = 15, height = 15) + +ggcorr_ps = ggcorrplot(corr1 + , p.mat = pmat1 + , hc.order = TRUE + , outline.col = "black" + , ggtheme = ggplot2::theme_gray + , colors = c("#6D9EC1", "white", "#E46726") + , title = "Spearman correlations with DUET and Foldx stability") + + +ggcorr_ps +#dev.off() + +#=========================== +# Data for Correlation plots: LIG +#=========================== +table(df_lig$ligand_outcome) + +df_lig$log10_or_mychisq = log10(df_lig$or_mychisq) +df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher) + + +df_lig$log10_or_kin = log10(df_lig$or_kin) +df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin) + +# subset data to generate pairwise correlations +cols_to_select_lig = c("affinity_scaled" + + , "log10_or_mychisq" + , "neglog_pval_fisher" + + , "or_kin" + , "neglog_pwald_kin" + + , "af" + + , "asa" + , "rsa" + , "kd_values" + , "rd_values" + + , "ligand_outcome" + , drug) + +corr_data_lig = df_lig[, cols_to_select_lig] + +dim(corr_data_lig) + +# assign nice colnames (for display) +my_corr_colnames_lig = c("Ligand Affinity" + + , "Log (OR)" + , "-Log (P)" + + , "OR (adjusted)" + , "-Log(P wald)" + + , "AF" + + , "ASA" + , "RSA" + , "KD" + , "RD" + + , "ligand_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_lig) +colnames(corr_data_lig) <- my_corr_colnames_lig +colnames(corr_data_lig) + +#------------------------ +# Data for ggcorr LIG plot +#------------------------ + +start = 1 +end_ggcorr_lig = which(colnames(corr_data_lig) == "ligand_outcome"); end_ggcorr_lig # should be the last column +offset = 1 + +my_ggcorr_lig = corr_data_lig[start:(end_ggcorr_lig-1)] +head(my_ggcorr_lig); str(my_ggcorr_lig) + +# correlation matrix +corr1_lig <- round(cor(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs"), 1) + +# p-value matrix +pmat1_lig <- cor_pmat(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs") + +corr2_lig = psych::corr.test(my_ggcorr_lig + , method = "spearman" + , use = "pairwise.complete.obs")$r + +corr2_lig = round(corr2_lig, 1) + +pmat2_lig = psych::corr.test(my_ggcorr_lig + , method = "spearman" + , adjust = "none" + , use = "pairwise.complete.obs")$p + +corr1_lig == corr2_lig +pmat1_lig == pmat2_lig + + +# for display order columns by hc order of ps + +#col_order = levels(ggcorr_ps$data[2]) + +#col_order <- c("Species", "Petal.Width", "Sepal.Length", + #"Sepal.Width", "Petal.Length") +#my_data2 <- my_data[, col_order] +#my_data2 + +#------------------------ +# Generate ggcorr LIG plot +#------------------------ +cat("ggCorr LIG plot:", plot_ggcorr_all_lig) +svg(plot_ggcorr_all_lig, width = 15, height = 15) + +ggcorr_lig = ggcorrplot(corr1_lig + , p.mat = pmat1_lig + , hc.order = TRUE + , outline.col = "black" + + , ggtheme = ggplot2::theme_gray + , colors = c("#6D9EC1", "white", "#E46726") + , title = "Spearman correlations with ligand affinty") + + +ggcorr_lig +#dev.off() + +####################################################### +#============================= +# combine plots for output +#============================= +cat("Output plot:", plot_ggcorr_all_combined_labelled) +svg(plot_ggcorr_all_combined_labelled , width = 30, height = 10) + +ggcorr_combined = cowplot::plot_grid(ggcorr_ps, ggcorr_lig + , nrow = 1 + , align = "h" + , labels = c("(a)","(b)") + #, rel_heights = c(1, 1) + #, rel_widths = c(0.85, 1, 1) + , label_size = 25) + +dev.off()