diff --git a/scripts/plotting/corr_plots.R b/scripts/plotting/corr_plots.R index 00c2c5a..425c695 100644 --- a/scripts/plotting/corr_plots.R +++ b/scripts/plotting/corr_plots.R @@ -261,3 +261,5 @@ OutPlot2 = pairs.panels(my_corr_lig[1:(length(my_corr_lig)-1)] print(OutPlot2) dev.off() ####################################################### + + diff --git a/scripts/plotting/corr_plots_foldx.R b/scripts/plotting/corr_plots_foldx.R new file mode 100644 index 0000000..d4f1efa --- /dev/null +++ b/scripts/plotting/corr_plots_foldx.R @@ -0,0 +1,189 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Corr plots for PS and Lig + +# Output: 1 svg + +#======================================================================= +# working dir and loading libraries +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + + +source("Header_TT.R") +require(cowplot) +source("combining_dfs_plotting.R") # FIXME: add extra from other plots here + +# should return the following dfs, directories and variables + + +#======= +# output +#======= +# can't combine by cowplot because not ggplots +#corr_plot_combined = "corr_combined.svg" +#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined) + +# PS foldx +corr_foldx = "corr_foldx.svg" +plot_corr_foldx = paste0(plotdir,"/", corr_foldx) + +#################################################################### +# end of loading libraries and functions # +######################################################################## + +#%%%%%%%%%%%%%%%%%%%%%%%%% +df_ps = merged_df3 +#%%%%%%%%%%%%%%%%%%%%%%%%% + +rm( merged_df2, merged_df2_comp, merged_df2_lig + , merged_df2_comp_lig + , merged_df3_comp, merged_df3_comp_lig + , my_df_u, my_df_u_lig) + +######################################################################## +# end of data extraction and cleaning for plots # +######################################################################## + +#=========================== +# Data for Correlation plots:foldx +#=========================== +#============================ +# adding foldx scaled values +# scale data b/w -1 and 1 +#============================ +n = which(colnames(df_ps) == "ddg"); n + +my_min = min(df_ps[,n]); my_min +my_max = max(df_ps[,n]); my_max + +df_ps$foldx_scaled = ifelse(df_ps[,n] < 0 + , df_ps[,n]/abs(my_min) + , df_ps[,n]/my_max) +# sanity check +my_min = min(df_ps$foldx_scaled); my_min +my_max = max(df_ps$foldx_scaled); my_max + +if (my_min == -1 && my_max == 1){ + cat("PASS: foldx ddg successfully scaled b/w -1 and 1" + , "\nProceeding with assigning foldx outcome category") +}else{ + cat("FAIL: could not scale foldx ddg values" + , "Aborting!") +} + + +#================================ +# adding foldx outcome category +# ddg<0 = "Stabilising" (-ve) +#================================= + +c1 = table(df_ps$ddg < 0) +df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising") +c2 = table(df_ps$ddg < 0) + +if ( all(c1 == c2) ){ + cat("PASS: foldx outcome successfully created") +}else{ + cat("FAIL: foldx outcome could not be created. Aborting!") + exit() +} + +table(df_ps$foldx_outcome) + +# adding log cols +df_ps$log10_or_mychisq = log10(df_ps$or_mychisq) +df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher) + +df_ps$log10_or_kin = log10(df_ps$or_kin) +df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin) + +# subset data to generate pairwise correlations +cols_to_select_foldx = c("foldx_scaled" + + , "duet_scaled" + + , "log10_or_mychisq" + , "neglog_pval_fisher" + + , "log10_or_kin" + , "neglog_pwald_kin" + + , "af" + + , "foldx_outcome" + , drug) + +corr_data_foldx = df_ps[, cols_to_select_foldx] + +dim(corr_data_foldx) + +#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic +#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic + +# assign nice colnames (for display) +my_corr_colnames_foldx = c("Foldx" + + ,"DUET" + + , "Log(OR)" + , "-Log(P)" + + , "Log(OR adjusted)" + , "-Log(P wald)" + + , "AF" + + , "foldx_outcome" + , drug) + +length(my_corr_colnames_foldx) + +colnames(corr_data_foldx) +colnames(corr_data_foldx) <- my_corr_colnames_foldx +colnames(corr_data_foldx) + +#----------------- +# generate corr foldx plot +#----------------- +start = 1 +end = which(colnames(corr_data_foldx) == drug); end # should be the last column +offset = 1 + +my_corr_foldx = corr_data_foldx[start:(end-offset)] +head(my_corr_foldx) + +#my_cols = c("#f8766d", "#00bfc4") +# deep blue :#007d85 +# deep red: #ae301e + +cat("Corr plot foldx:", plot_corr_foldx) +svg(plot_corr_foldx, width = 15, height = 15) + +OutPlot_foldx= pairs.panels(my_corr_foldx[1:(length(my_corr_foldx)-1)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = TRUE # show density plots + , ellipses = F # show correlation ellipses + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_foldx$foldx_outcome))] + , pch = 21 + , jitter = T + #, alpha = .05 + #, points(pch = 19, col = c("#f8766d", "#00bfc4")) + , cex = 3 + , cex.axis = 2.5 + , cex.labels = 2.1 + , cex.cor = 1 + , smooth = F +) + +print(OutPlot_foldx) +dev.off() + + + diff --git a/scripts/plotting/other_plots_data.R b/scripts/plotting/other_plots_data.R index 60844aa..c89a010 100644 --- a/scripts/plotting/other_plots_data.R +++ b/scripts/plotting/other_plots_data.R @@ -13,6 +13,7 @@ getwd() library(ggplot2) library(data.table) library(dplyr) +library(tidyverse) source("combining_dfs_plotting.R") rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig