added ggcorr plots combined for all params

This commit is contained in:
Tanushree Tunstall 2020-09-18 11:56:19 +01:00
parent 24b1cc2440
commit 369c906a33

View file

@ -0,0 +1,389 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Corr plots for PS and Lig
# Output: 1 svg
#=======================================================================
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting/")
getwd()
source("Header_TT.R")
require(cowplot)
source("combining_dfs_plotting.R")
source("my_pairs_panel.R")
# should return the following dfs, directories and variables
# PS combined:
# 1) merged_df2
# 2) merged_df2_comp
# 3) merged_df3
# 4) merged_df3_comp
# LIG combined:
# 5) merged_df2_lig
# 6) merged_df2_comp_lig
# 7) merged_df3_lig
# 8) merged_df3_comp_lig
# 9) my_df_u
# 10) my_df_u_lig
cat(paste0("Directories imported:"
, "\ndatadir:", datadir
, "\nindir:", indir
, "\noutdir:", outdir
, "\nplotdir:", plotdir))
cat(paste0("Variables imported:"
, "\ndrug:", drug
, "\ngene:", gene
, "\ngene_match:", gene_match
, "\nAngstrom symbol:", angstroms_symbol
, "\nNo. of duplicated muts:", dup_muts_nu
, "\nNA count for ORs:", na_count
, "\nNA count in df2:", na_count_df2
, "\nNA count in df3:", na_count_df3))
#=======
# output
#=======
# can't combine by cowplot because not ggplots
#corr_plot_combined = "corr_combined.svg"
#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined)
# PS
ggcorr_all_ps = "ggcorr_all_PS.svg"
plot_ggcorr_all_ps = paste0(plotdir,"/", ggcorr_all_ps)
# LIG
ggcorr_all_lig = "ggcorr_all_LIG.svg"
plot_ggcorr_all_lig = paste0(plotdir,"/", ggcorr_all_lig )
# combined
ggcorr_all_combined_labelled = "ggcorr_all_combined_labelled.svg"
plot_ggcorr_all_combined_labelled = paste0(plotdir,"/", ggcorr_all_combined_labelled)
####################################################################
# end of loading libraries and functions #
########################################################################
#%%%%%%%%%%%%%%%%%%%%%%%%%
#df_ps = merged_df3_comp
#df_lig = merged_df3_comp_lig
df_ps = merged_df3
df_lig = merged_df3_lig
#%%%%%%%%%%%%%%%%%%%%%%%%%
rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig)
########################################################################
# end of data extraction and cleaning for plots #
########################################################################
#===========================
# Data for Correlation plots:PS
#===========================
table(df_ps$duet_outcome)
#===========================
# Data for Correlation plots:foldx
#===========================
#============================
# adding foldx scaled values
# scale data b/w -1 and 1
#============================
n = which(colnames(df_ps) == "ddg"); n
my_min = min(df_ps[,n]); my_min
my_max = max(df_ps[,n]); my_max
df_ps$foldx_scaled = ifelse(df_ps[,n] < 0
, df_ps[,n]/abs(my_min)
, df_ps[,n]/my_max)
# sanity check
my_min = min(df_ps$foldx_scaled); my_min
my_max = max(df_ps$foldx_scaled); my_max
if (my_min == -1 && my_max == 1){
cat("PASS: foldx ddg successfully scaled b/w -1 and 1"
, "\nProceeding with assigning foldx outcome category")
}else{
cat("FAIL: could not scale foldx ddg values"
, "Aborting!")
}
#================================
# adding foldx outcome category
# ddg<0 = "Stabilising" (-ve)
#=================================
c1 = table(df_ps$ddg < 0)
df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising")
c2 = table(df_ps$ddg < 0)
if ( all(c1 == c2) ){
cat("PASS: foldx outcome successfully created")
}else{
cat("FAIL: foldx outcome could not be created. Aborting!")
exit()
}
table(df_ps$foldx_outcome)
#======================
# adding log cols
#======================
df_ps$log10_or_mychisq = log10(df_ps$or_mychisq)
df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher)
df_ps$log10_or_kin = log10(df_ps$or_kin)
df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin)
# subset data to generate pairwise correlations
cols_to_select = c("duet_scaled"
, "foldx_scaled"
, "log10_or_mychisq"
, "neglog_pval_fisher"
, "or_kin"
, "neglog_pwald_kin"
, "af"
, "asa"
, "rsa"
, "kd_values"
, "rd_values"
, "duet_outcome"
, drug)
corr_data_ps = df_ps[, cols_to_select]
dim(corr_data_ps)
#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic
#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic
# assign nice colnames (for display)
my_corr_colnames = c("DUET"
, "Foldx"
, "Log (OR)"
, "-Log (P)"
, "OR (adjusted)"
, "-Log (P wald)"
, "AF"
, "ASA"
, "RSA"
, "KD"
, "RD"
, "duet_outcome"
, drug)
length(my_corr_colnames)
colnames(corr_data_ps)
colnames(corr_data_ps) <- my_corr_colnames
colnames(corr_data_ps)
#------------------------
# Data for ggcorr PS plot
#------------------------
start = 1
end_ggcorr = which(colnames(corr_data_ps) == "duet_outcome"); end_ggcorr # should be the last column
offset = 1
my_ggcorr_ps = corr_data_ps[start:(end_ggcorr-1)]
head(my_ggcorr_ps)
# correlation matrix
corr1 <- round(cor(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs"), 1)
# p-value matrix
pmat1 <- cor_pmat(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs")
corr2 = psych::corr.test(my_ggcorr_ps
, method = "spearman"
, use = "pairwise.complete.obs")$r
corr2 = round(corr2, 1)
pmat2 = psych::corr.test(my_ggcorr_ps
, method = "spearman"
, adjust = "none"
, use = "pairwise.complete.obs")$p
corr1== corr2
pmat1==pmat2
#------------------------
# Generate ggcorr PS plot
#------------------------
cat("ggCorr plot PS:", plot_ggcorr_all_ps)
svg(plot_ggcorr_all_ps, width = 15, height = 15)
ggcorr_ps = ggcorrplot(corr1
, p.mat = pmat1
, hc.order = TRUE
, outline.col = "black"
, ggtheme = ggplot2::theme_gray
, colors = c("#6D9EC1", "white", "#E46726")
, title = "Spearman correlations with DUET and Foldx stability")
ggcorr_ps
#dev.off()
#===========================
# Data for Correlation plots: LIG
#===========================
table(df_lig$ligand_outcome)
df_lig$log10_or_mychisq = log10(df_lig$or_mychisq)
df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher)
df_lig$log10_or_kin = log10(df_lig$or_kin)
df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin)
# subset data to generate pairwise correlations
cols_to_select_lig = c("affinity_scaled"
, "log10_or_mychisq"
, "neglog_pval_fisher"
, "or_kin"
, "neglog_pwald_kin"
, "af"
, "asa"
, "rsa"
, "kd_values"
, "rd_values"
, "ligand_outcome"
, drug)
corr_data_lig = df_lig[, cols_to_select_lig]
dim(corr_data_lig)
# assign nice colnames (for display)
my_corr_colnames_lig = c("Ligand Affinity"
, "Log (OR)"
, "-Log (P)"
, "OR (adjusted)"
, "-Log(P wald)"
, "AF"
, "ASA"
, "RSA"
, "KD"
, "RD"
, "ligand_outcome"
, drug)
length(my_corr_colnames)
colnames(corr_data_lig)
colnames(corr_data_lig) <- my_corr_colnames_lig
colnames(corr_data_lig)
#------------------------
# Data for ggcorr LIG plot
#------------------------
start = 1
end_ggcorr_lig = which(colnames(corr_data_lig) == "ligand_outcome"); end_ggcorr_lig # should be the last column
offset = 1
my_ggcorr_lig = corr_data_lig[start:(end_ggcorr_lig-1)]
head(my_ggcorr_lig); str(my_ggcorr_lig)
# correlation matrix
corr1_lig <- round(cor(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs"), 1)
# p-value matrix
pmat1_lig <- cor_pmat(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs")
corr2_lig = psych::corr.test(my_ggcorr_lig
, method = "spearman"
, use = "pairwise.complete.obs")$r
corr2_lig = round(corr2_lig, 1)
pmat2_lig = psych::corr.test(my_ggcorr_lig
, method = "spearman"
, adjust = "none"
, use = "pairwise.complete.obs")$p
corr1_lig == corr2_lig
pmat1_lig == pmat2_lig
# for display order columns by hc order of ps
#col_order = levels(ggcorr_ps$data[2])
#col_order <- c("Species", "Petal.Width", "Sepal.Length",
#"Sepal.Width", "Petal.Length")
#my_data2 <- my_data[, col_order]
#my_data2
#------------------------
# Generate ggcorr LIG plot
#------------------------
cat("ggCorr LIG plot:", plot_ggcorr_all_lig)
svg(plot_ggcorr_all_lig, width = 15, height = 15)
ggcorr_lig = ggcorrplot(corr1_lig
, p.mat = pmat1_lig
, hc.order = TRUE
, outline.col = "black"
, ggtheme = ggplot2::theme_gray
, colors = c("#6D9EC1", "white", "#E46726")
, title = "Spearman correlations with ligand affinty")
ggcorr_lig
#dev.off()
#######################################################
#=============================
# combine plots for output
#=============================
cat("Output plot:", plot_ggcorr_all_combined_labelled)
svg(plot_ggcorr_all_combined_labelled , width = 30, height = 10)
ggcorr_combined = cowplot::plot_grid(ggcorr_ps, ggcorr_lig
, nrow = 1
, align = "h"
, labels = c("(a)","(b)")
#, rel_heights = c(1, 1)
#, rel_widths = c(0.85, 1, 1)
, label_size = 25)
dev.off()