From 81796df71ab148dd25c2b37bdce5787a0d1183ba Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 29 Sep 2020 16:08:25 +0100 Subject: [PATCH] added corr_data.R corr_PS_LIG_all.R corr_PS_LIG_v2.R --- scripts/plotting/corr_PS_LIG_all.R | 166 +++++++++++++++++++++ scripts/plotting/corr_PS_LIG_v2.R | 176 ++++++++++++++++++++++ scripts/plotting/corr_data.R | 232 +++++++++++++++++++++++++++++ 3 files changed, 574 insertions(+) create mode 100644 scripts/plotting/corr_PS_LIG_all.R create mode 100644 scripts/plotting/corr_PS_LIG_v2.R create mode 100644 scripts/plotting/corr_data.R diff --git a/scripts/plotting/corr_PS_LIG_all.R b/scripts/plotting/corr_PS_LIG_all.R new file mode 100644 index 0000000..138ef11 --- /dev/null +++ b/scripts/plotting/corr_PS_LIG_all.R @@ -0,0 +1,166 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Corr plots for PS and Lig + +# Output: 1 svg + +#======================================================================= +# working dir and loading libraries +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + +#source("combining_dfs_plotting.R") +source("my_pairs_panel.R") # with lower panel turned off +source("corr_data.R") + +#======= +# output +#======= +# PS +corr_ps_all_df2 = "corr_PS_ALL_df2.svg" +plot_corr_ps_all_df2 = paste0(plotdir,"/", corr_ps_all_df2) + +corr_ps_all_df3 = "corr_PS_ALL_df3.svg" +plot_corr_ps_all_df3 = paste0(plotdir,"/", corr_ps_all_df3) + +# LIG +corr_lig_all_df2 = "corr_LIG_ALL_df2.svg" +plot_corr_lig_all_df2 = paste0(plotdir,"/", corr_lig_all_df2) + +corr_lig_all_df3 = "corr_LIG_ALL_df3.svg" +plot_corr_lig_all_df3 = paste0(plotdir,"/", corr_lig_all_df3) + +#################################################################### +# end of loading libraries and functions +#################################################################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Data for plots +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +# PS +corr_ps_df2 = corr_ps_df2[-1] +corr_ps_df3 = corr_ps_df3[-1] + +# Lig +corr_lig_df2 = corr_lig_df2[-1] +corr_lig_df3 = corr_lig_df3[-1] + +#--------------------------------------- +# generate corr PS plot 1: merged_df2 +#--------------------------------------- +cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_all_df2) +svg(plot_corr_ps_all_df2, width = 30, height = 30) + +OutPlot_ps_df2 = pairs.panels(corr_ps_df2[1:(length(corr_ps_df2)-2)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = T # show density plots + , ellipses = F # show correlation ellipses + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df2$duet_outcome))] # can't use colour as duet and foldx are opposite + , pch = 21 # for bg + #, pch = 19 + , jitter = T + , alpha = 1 + #, points(pch = 19, col = c("#f8766d", "#00bfc4")) + , cex = 1.8 + , cex.axis = 2 + , cex.labels = 2 + , cex.cor = 1 + , smooth = F) + +print(OutPlot_ps_df2) +dev.off() + +#---------------------------------------------- +# generate corr PS plot 2: merged_df3 +#---------------------------------------------- +cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_all_df3) +svg(plot_corr_ps_all_df3, width = 30, height = 30) + +OutPlot_ps_df3 = pairs.panels(corr_ps_df3[1:(length(corr_ps_df3)-2)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = T # show density plots + , ellipses = F # show correlation ellipses + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df3$duet_outcome))] # can't use colour as duet and foldx are opposite + , pch = 21 # for bg + , cex = 2 + , cex.axis = 1.6 + , cex.labels = 2 + , cex.cor = 1 + , smooth = F + +) + +print(OutPlot_ps_df3) +dev.off() + +################################################################################################ + + +#--------------------------------------- +# generate corr lig plot 1: merged_df2 +#--------------------------------------- +cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_all_df2) +svg(plot_corr_lig_all_df2, width = 30, height = 30) + +OutPlot_lig_df2 = pairs.panels(corr_lig_df2[1:(length(corr_lig_df2)-2)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = T # show density plots + , ellipses = F # show correlation elliliges + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df2$ligand_outcome))] # can't use colour as duet and foldx are opposite + , pch = 21 # for bg + #, pch = 19 + , jitter = T + , alpha = 1 + #, points(pch = 19, col = c("#f8766d", "#00bfc4")) + , cex = 1.8 + , cex.axis = 2 + , cex.labels = 2 + , cex.cor = 1 + , smooth = F) + +print(OutPlot_lig_df2) +dev.off() + +#---------------------------------------------- +# generate corr lig plot 2: merged_df3 +#---------------------------------------------- +cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_all_df3) +svg(plot_corr_lig_all_df3, width = 30, height = 30) + +OutPlot_lig_df3 = pairs.panels(corr_lig_df3[1:(length(corr_lig_df3)-2)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = T # show density plots + , ellipses = F # show correlation elliliges + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df3$ligand_outcome))] # can't use colour as duet and foldx are opposite + , pch = 21 # for bg + , cex = 2 + , cex.axis = 1.6 + , cex.labels = 2 + , cex.cor = 1 + , smooth = F + +) + +print(OutPlot_lig_df3) +dev.off() diff --git a/scripts/plotting/corr_PS_LIG_v2.R b/scripts/plotting/corr_PS_LIG_v2.R new file mode 100644 index 0000000..908b7d4 --- /dev/null +++ b/scripts/plotting/corr_PS_LIG_v2.R @@ -0,0 +1,176 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Corr plots for PS and Lig + +# Output: 1 svg + +#======================================================================= +# working dir and loading libraries +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + +#source("combining_dfs_plotting.R") +source("my_pairs_panel.R") # with lower panel turned off +source("corr_data.R") + +#======= +# output +#======= +# PS +corrplot_ps_df2 = "corr_PS_df2.svg" +plot_corr_ps_df2 = paste0(plotdir,"/", corrplot_ps_df2) + +corrplot_ps_df3 = "corr_PS_df3.svg" +plot_corr_ps_df3 = paste0(plotdir,"/", corrplot_ps_df3) + +# LIG +corrplot_lig_df2 = "corr_LIG_df2.svg" +plot_corr_lig_df2 = paste0(plotdir,"/", corrplot_lig_df2) + +corrplot_lig_df3 = "corr_LIG_df3.svg" +plot_corr_lig_df3 = paste0(plotdir,"/", corrplot_lig_df3) + +#################################################################### +# end of loading libraries and functions +#################################################################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Data for plots +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +cols_to_drop = c("ASA", "AF_kin") + + +# PS +corr_ps_df2 = corr_ps_df2[!colnames(corr_ps_df2)%in%cols_to_drop] +corr_ps_df2 = corr_ps_df2[-1] + +corr_ps_df3 = corr_ps_df3[!colnames(corr_ps_df3)%in%cols_to_drop] +corr_ps_df3 = corr_ps_df3[-1] + + +# Lig +corr_lig_df2 = corr_lig_df2[!colnames(corr_lig_df2)%in%cols_to_drop] +corr_lig_df2 = corr_lig_df2[-1] + +corr_lig_df3 = corr_lig_df3[!colnames(corr_lig_df3)%in%cols_to_drop] +corr_lig_df3 = corr_lig_df3[-1] + +#--------------------------------------- +# generate corr PS plot 1: merged_df2 +#--------------------------------------- +cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_df2) +svg(plot_corr_ps_df2, width = 30, height = 25) + +OutPlot_ps_df2 = pairs.panels(corr_ps_df2[1:(length(corr_ps_df2)-2)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = T # show density plots + , ellipses = F # show correlation ellipses + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df2$duet_outcome))] # can't use colour as duet and foldx are opposite + , pch = 21 # for bg + #, pch = 19 + , jitter = T + , alpha = 1 + #, points(pch = 19, col = c("#f8766d", "#00bfc4")) + , cex = 1.8 + , cex.axis = 2 + , cex.labels = 3.8 + , cex.cor = 1 + , smooth = F) + +print(OutPlot_ps_df2) +dev.off() + +#---------------------------------------------- +# generate corr PS plot 2: merged_df3 +#---------------------------------------------- +cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_df3) +svg(plot_corr_ps_df3, width = 30, height = 25) + +OutPlot_ps_df3 = pairs.panels(corr_ps_df3[1:(length(corr_ps_df3)-2)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = T # show density plots + , ellipses = F # show correlation ellipses + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df3$duet_outcome))] # can't use colour as duet and foldx are opposite + , pch = 21 # for bg + , cex = 3 + , cex.axis = 1.6 + , cex.labels = 3.8 + , cex.cor = 1 + , smooth = F + +) + +print(OutPlot_ps_df3) +dev.off() + +################################################################################################ + +#--------------------------------------- +# generate corr lig plot 1: merged_df2 +#--------------------------------------- +cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_df2) +svg(plot_corr_lig_df2, width = 30, height = 25) + +OutPlot_lig_df2 = pairs.panels(corr_lig_df2[1:(length(corr_lig_df2)-2)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = T # show density plots + , ellipses = F # show correlation elliliges + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df2$ligand_outcome))] # can't use colour as duet and foldx are opposite + , pch = 21 # for bg + #, pch = 19 + , jitter = T + , alpha = 1 + #, points(pch = 19, col = c("#f8766d", "#00bfc4")) + , cex = 1.8 + , cex.axis = 2 + , cex.labels = 3.8 + , cex.cor = 1 + , smooth = F) + +print(OutPlot_lig_df2) +dev.off() + +#---------------------------------------------- +# generate corr lig plot 2: merged_df3 +#---------------------------------------------- +cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_df3) +svg(plot_corr_lig_df3, width = 30, height = 25) + +OutPlot_lig_df3 = pairs.panels(corr_lig_df3[1:(length(corr_lig_df3)-2)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = T # show density plots + , ellipses = F # show correlation elliliges + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df3$ligand_outcome))] # can't use colour as duet and foldx are opposite + , pch = 21 # for bg + , cex = 3 + , cex.axis = 1.6 + , cex.labels = 3.8 + , cex.cor = 1 + , smooth = F + +) + +print(OutPlot_lig_df3) +dev.off() + diff --git a/scripts/plotting/corr_data.R b/scripts/plotting/corr_data.R new file mode 100644 index 0000000..158c3c3 --- /dev/null +++ b/scripts/plotting/corr_data.R @@ -0,0 +1,232 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Prepare for correlation data + +#======================================================================= +# working dir and loading libraries +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + +#source("Header_TT.R") +source("combining_dfs_plotting.R") +source("my_pairs_panel.R") # with lower panel turned off +# should return the following dfs, directories and variables + +# PS combined: +# 1) merged_df2 +# 2) merged_df2_comp +# 3) merged_df3 +# 4) merged_df3_comp + +# LIG combined: +# 5) merged_df2_lig +# 6) merged_df2_comp_lig +# 7) merged_df3_lig +# 8) merged_df3_comp_lig + +# 9) my_df_u +# 10) my_df_u_lig + +cat(paste0("Directories imported:" + , "\ndatadir:", datadir + , "\nindir:", indir + , "\noutdir:", outdir + , "\nplotdir:", plotdir)) + +cat(paste0("Variables imported:" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nAngstrom symbol:", angstroms_symbol + , "\nNo. of duplicated muts:", dup_muts_nu + , "\nNA count for ORs:", na_count + , "\nNA count in df2:", na_count_df2 + , "\nNA count in df3:", na_count_df3)) + +#======= +# output +#======= +# corr_ps_df2 +# corr_lig_df2 + +#################################################################### +# end of loading libraries and functions +#################################################################### + +#%%%%%%%%%%%%%%%%%%%%%%%%% +#df_ps = merged_df3 +df_ps = merged_df2 + +#df_lig = merged_df3_lig +df_lig = merged_df2_lig + +#%%%%%%%%%%%%%%%%%%%%%%%%% + + +######################################################################## +# end of data extraction and cleaning for plots # +######################################################################## + +#====================== +# adding log cols +#====================== + +df_ps$log10_or_mychisq = log10(df_ps$or_mychisq) +df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher) + +df_ps$log10_or_kin = log10(df_ps$or_kin) +df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin) + +#df_ps$mutation_info_labels = ifelse(df_ps$mutation_info == dr_muts_col, 1, 0) + +#=========================== +# Data for Correlation plots:PS +#=========================== +# subset data to generate pairwise correlations +cols_to_select = c("mutationinformation" + , "duet_scaled" + , "foldx_scaled" + #, "mutation_info_labels" + , "asa" + , "rsa" + , "rd_values" + , "kd_values" + , "log10_or_mychisq" + , "neglog_pval_fisher" + , "or_kin" + , "neglog_pwald_kin" + , "af" + , "af_kin" + , "duet_outcome" + , drug) + +corr_data_ps = df_ps[cols_to_select] + +dim(corr_data_ps) + +# assign nice colnames (for display) +my_corr_colnames = c("Mutation" + , "DUET" + , "Foldx" + #, "Mutation class" + , "ASA" + , "RSA" + , "RD" + , "KD" + , "Log (OR)" + , "-Log (P)" + , "Adjusted (OR)" + , "-Log (P wald)" + , "AF" + , "AF_kin" + , "duet_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_ps) +colnames(corr_data_ps) <- my_corr_colnames +colnames(corr_data_ps) + +start = 1 +end = which(colnames(corr_data_ps) == drug); end # should be the last column +offset = 1 + +#corr_ps_df2 = corr_data_ps[start:(end-offset)] # without drug +corr_ps_df2 = corr_data_ps[start:end] +head(corr_ps_df2) + +#----------------- +# short_df ps: merged_df3 +#----------------- +corr_ps_df3 = corr_ps_df2[!duplicated(corr_ps_df2$Mutation),] + +na_or = sum(is.na(corr_ps_df3$`Log (OR)`)) +check1 = nrow(corr_ps_df3) - na_or + +na_adj_or = sum(is.na(corr_ps_df3$`adjusted (OR)`)) +check2 = nrow(corr_ps_df3) - na_adj_or + +#if ( nrow(corr_ps_df3) == nrow(merged_df3) ) { +# cat( "PASS: No. of rows for corr_ps_df3 match" ) +#}if ( nrow(merged_df3_comp) == check1 ){ +# cat( "PASS: No. of OR values checked" ) +#} + +################################################################################################ +#=========================== +# Data for Correlation plots: LIG +#=========================== +table(df_lig$ligand_outcome) + +df_lig$log10_or_mychisq = log10(df_lig$or_mychisq) +df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher) + +df_lig$log10_or_kin = log10(df_lig$or_kin) +df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin) + +# subset data to generate pairwise correlations +cols_to_select = c("mutationinformation" + , "affinity_scaled" + #, "mutation_info_labels" + , "asa" + , "rsa" + , "rd_values" + , "kd_values" + , "log10_or_mychisq" + , "neglog_pval_fisher" + , "or_kin" + , "neglog_pwald_kin" + , "af" + , "af_kin" + , "ligand_outcome" + , drug) + +corr_data_lig = df_lig[, cols_to_select] + + +dim(corr_data_lig) + +# assign nice colnames (for display) +my_corr_colnames = c("Mutation" + , "Ligand Affinity" + #, "Mutation class" + , "ASA" + , "RSA" + , "RD" + , "KD" + , "Log (OR)" + , "-Log (P)" + , "Adjusted (OR)" + , "-Log (P wald)" + , "AF" + , "AF_kin" + , "ligand_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_lig) +colnames(corr_data_lig) <- my_corr_colnames +colnames(corr_data_lig) + +start = 1 +end = which(colnames(corr_data_lig) == drug); end # should be the last column +offset = 1 + +#corr_lig_df2 = corr_data_lig[start:(end-offset)] # without drug +corr_lig_df2 = corr_data_lig[start:end] +head(corr_lig_df2) + + +#----------------- +# short_df lig: merged_df3_lig +#----------------- + +corr_lig_df3 = corr_lig_df2[!duplicated(corr_lig_df2$Mutation),] + +####################################################### +rm(merged_df2, merged_df2_lig, merged_df3, merged_df3_lig + , merged_df2_comp , merged_df3_comp, merged_df2_comp_lig, merged_df3_comp_lig + , corr_data_ps, corr_data_lig) \ No newline at end of file