added ggcorr plots combined for all params
This commit is contained in:
parent
24b1cc2440
commit
369c906a33
1 changed files with 389 additions and 0 deletions
389
scripts/plotting/ggcorr_all_PS_LIG.R
Normal file
389
scripts/plotting/ggcorr_all_PS_LIG.R
Normal file
|
@ -0,0 +1,389 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
#########################################################
|
||||||
|
# TASK: Corr plots for PS and Lig
|
||||||
|
|
||||||
|
# Output: 1 svg
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
# working dir and loading libraries
|
||||||
|
getwd()
|
||||||
|
setwd("~/git/LSHTM_analysis/scripts/plotting/")
|
||||||
|
getwd()
|
||||||
|
|
||||||
|
|
||||||
|
source("Header_TT.R")
|
||||||
|
require(cowplot)
|
||||||
|
source("combining_dfs_plotting.R")
|
||||||
|
source("my_pairs_panel.R")
|
||||||
|
# should return the following dfs, directories and variables
|
||||||
|
|
||||||
|
# PS combined:
|
||||||
|
# 1) merged_df2
|
||||||
|
# 2) merged_df2_comp
|
||||||
|
# 3) merged_df3
|
||||||
|
# 4) merged_df3_comp
|
||||||
|
|
||||||
|
# LIG combined:
|
||||||
|
# 5) merged_df2_lig
|
||||||
|
# 6) merged_df2_comp_lig
|
||||||
|
# 7) merged_df3_lig
|
||||||
|
# 8) merged_df3_comp_lig
|
||||||
|
|
||||||
|
# 9) my_df_u
|
||||||
|
# 10) my_df_u_lig
|
||||||
|
|
||||||
|
cat(paste0("Directories imported:"
|
||||||
|
, "\ndatadir:", datadir
|
||||||
|
, "\nindir:", indir
|
||||||
|
, "\noutdir:", outdir
|
||||||
|
, "\nplotdir:", plotdir))
|
||||||
|
|
||||||
|
cat(paste0("Variables imported:"
|
||||||
|
, "\ndrug:", drug
|
||||||
|
, "\ngene:", gene
|
||||||
|
, "\ngene_match:", gene_match
|
||||||
|
, "\nAngstrom symbol:", angstroms_symbol
|
||||||
|
, "\nNo. of duplicated muts:", dup_muts_nu
|
||||||
|
, "\nNA count for ORs:", na_count
|
||||||
|
, "\nNA count in df2:", na_count_df2
|
||||||
|
, "\nNA count in df3:", na_count_df3))
|
||||||
|
|
||||||
|
#=======
|
||||||
|
# output
|
||||||
|
#=======
|
||||||
|
# can't combine by cowplot because not ggplots
|
||||||
|
#corr_plot_combined = "corr_combined.svg"
|
||||||
|
#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined)
|
||||||
|
|
||||||
|
# PS
|
||||||
|
ggcorr_all_ps = "ggcorr_all_PS.svg"
|
||||||
|
plot_ggcorr_all_ps = paste0(plotdir,"/", ggcorr_all_ps)
|
||||||
|
|
||||||
|
# LIG
|
||||||
|
ggcorr_all_lig = "ggcorr_all_LIG.svg"
|
||||||
|
plot_ggcorr_all_lig = paste0(plotdir,"/", ggcorr_all_lig )
|
||||||
|
|
||||||
|
# combined
|
||||||
|
ggcorr_all_combined_labelled = "ggcorr_all_combined_labelled.svg"
|
||||||
|
plot_ggcorr_all_combined_labelled = paste0(plotdir,"/", ggcorr_all_combined_labelled)
|
||||||
|
|
||||||
|
####################################################################
|
||||||
|
# end of loading libraries and functions #
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
#df_ps = merged_df3_comp
|
||||||
|
#df_lig = merged_df3_comp_lig
|
||||||
|
|
||||||
|
df_ps = merged_df3
|
||||||
|
df_lig = merged_df3_lig
|
||||||
|
|
||||||
|
#%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig)
|
||||||
|
|
||||||
|
########################################################################
|
||||||
|
# end of data extraction and cleaning for plots #
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#===========================
|
||||||
|
# Data for Correlation plots:PS
|
||||||
|
#===========================
|
||||||
|
table(df_ps$duet_outcome)
|
||||||
|
|
||||||
|
|
||||||
|
#===========================
|
||||||
|
# Data for Correlation plots:foldx
|
||||||
|
#===========================
|
||||||
|
#============================
|
||||||
|
# adding foldx scaled values
|
||||||
|
# scale data b/w -1 and 1
|
||||||
|
#============================
|
||||||
|
n = which(colnames(df_ps) == "ddg"); n
|
||||||
|
|
||||||
|
my_min = min(df_ps[,n]); my_min
|
||||||
|
my_max = max(df_ps[,n]); my_max
|
||||||
|
|
||||||
|
df_ps$foldx_scaled = ifelse(df_ps[,n] < 0
|
||||||
|
, df_ps[,n]/abs(my_min)
|
||||||
|
, df_ps[,n]/my_max)
|
||||||
|
# sanity check
|
||||||
|
my_min = min(df_ps$foldx_scaled); my_min
|
||||||
|
my_max = max(df_ps$foldx_scaled); my_max
|
||||||
|
|
||||||
|
if (my_min == -1 && my_max == 1){
|
||||||
|
cat("PASS: foldx ddg successfully scaled b/w -1 and 1"
|
||||||
|
, "\nProceeding with assigning foldx outcome category")
|
||||||
|
}else{
|
||||||
|
cat("FAIL: could not scale foldx ddg values"
|
||||||
|
, "Aborting!")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#================================
|
||||||
|
# adding foldx outcome category
|
||||||
|
# ddg<0 = "Stabilising" (-ve)
|
||||||
|
#=================================
|
||||||
|
|
||||||
|
c1 = table(df_ps$ddg < 0)
|
||||||
|
df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising")
|
||||||
|
c2 = table(df_ps$ddg < 0)
|
||||||
|
|
||||||
|
if ( all(c1 == c2) ){
|
||||||
|
cat("PASS: foldx outcome successfully created")
|
||||||
|
}else{
|
||||||
|
cat("FAIL: foldx outcome could not be created. Aborting!")
|
||||||
|
exit()
|
||||||
|
}
|
||||||
|
|
||||||
|
table(df_ps$foldx_outcome)
|
||||||
|
|
||||||
|
|
||||||
|
#======================
|
||||||
|
# adding log cols
|
||||||
|
#======================
|
||||||
|
|
||||||
|
df_ps$log10_or_mychisq = log10(df_ps$or_mychisq)
|
||||||
|
df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher)
|
||||||
|
|
||||||
|
df_ps$log10_or_kin = log10(df_ps$or_kin)
|
||||||
|
df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin)
|
||||||
|
|
||||||
|
# subset data to generate pairwise correlations
|
||||||
|
cols_to_select = c("duet_scaled"
|
||||||
|
|
||||||
|
, "foldx_scaled"
|
||||||
|
|
||||||
|
, "log10_or_mychisq"
|
||||||
|
, "neglog_pval_fisher"
|
||||||
|
|
||||||
|
, "or_kin"
|
||||||
|
, "neglog_pwald_kin"
|
||||||
|
|
||||||
|
, "af"
|
||||||
|
|
||||||
|
, "asa"
|
||||||
|
, "rsa"
|
||||||
|
, "kd_values"
|
||||||
|
, "rd_values"
|
||||||
|
|
||||||
|
, "duet_outcome"
|
||||||
|
, drug)
|
||||||
|
|
||||||
|
corr_data_ps = df_ps[, cols_to_select]
|
||||||
|
|
||||||
|
dim(corr_data_ps)
|
||||||
|
|
||||||
|
#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic
|
||||||
|
#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic
|
||||||
|
|
||||||
|
# assign nice colnames (for display)
|
||||||
|
my_corr_colnames = c("DUET"
|
||||||
|
|
||||||
|
, "Foldx"
|
||||||
|
|
||||||
|
, "Log (OR)"
|
||||||
|
, "-Log (P)"
|
||||||
|
|
||||||
|
, "OR (adjusted)"
|
||||||
|
, "-Log (P wald)"
|
||||||
|
|
||||||
|
, "AF"
|
||||||
|
|
||||||
|
, "ASA"
|
||||||
|
, "RSA"
|
||||||
|
, "KD"
|
||||||
|
, "RD"
|
||||||
|
|
||||||
|
, "duet_outcome"
|
||||||
|
, drug)
|
||||||
|
|
||||||
|
length(my_corr_colnames)
|
||||||
|
|
||||||
|
colnames(corr_data_ps)
|
||||||
|
colnames(corr_data_ps) <- my_corr_colnames
|
||||||
|
colnames(corr_data_ps)
|
||||||
|
|
||||||
|
#------------------------
|
||||||
|
# Data for ggcorr PS plot
|
||||||
|
#------------------------
|
||||||
|
start = 1
|
||||||
|
end_ggcorr = which(colnames(corr_data_ps) == "duet_outcome"); end_ggcorr # should be the last column
|
||||||
|
offset = 1
|
||||||
|
|
||||||
|
my_ggcorr_ps = corr_data_ps[start:(end_ggcorr-1)]
|
||||||
|
head(my_ggcorr_ps)
|
||||||
|
|
||||||
|
# correlation matrix
|
||||||
|
corr1 <- round(cor(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs"), 1)
|
||||||
|
|
||||||
|
# p-value matrix
|
||||||
|
pmat1 <- cor_pmat(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs")
|
||||||
|
|
||||||
|
corr2 = psych::corr.test(my_ggcorr_ps
|
||||||
|
, method = "spearman"
|
||||||
|
, use = "pairwise.complete.obs")$r
|
||||||
|
corr2 = round(corr2, 1)
|
||||||
|
|
||||||
|
pmat2 = psych::corr.test(my_ggcorr_ps
|
||||||
|
, method = "spearman"
|
||||||
|
, adjust = "none"
|
||||||
|
, use = "pairwise.complete.obs")$p
|
||||||
|
|
||||||
|
corr1== corr2
|
||||||
|
pmat1==pmat2
|
||||||
|
|
||||||
|
#------------------------
|
||||||
|
# Generate ggcorr PS plot
|
||||||
|
#------------------------
|
||||||
|
cat("ggCorr plot PS:", plot_ggcorr_all_ps)
|
||||||
|
svg(plot_ggcorr_all_ps, width = 15, height = 15)
|
||||||
|
|
||||||
|
ggcorr_ps = ggcorrplot(corr1
|
||||||
|
, p.mat = pmat1
|
||||||
|
, hc.order = TRUE
|
||||||
|
, outline.col = "black"
|
||||||
|
, ggtheme = ggplot2::theme_gray
|
||||||
|
, colors = c("#6D9EC1", "white", "#E46726")
|
||||||
|
, title = "Spearman correlations with DUET and Foldx stability")
|
||||||
|
|
||||||
|
|
||||||
|
ggcorr_ps
|
||||||
|
#dev.off()
|
||||||
|
|
||||||
|
#===========================
|
||||||
|
# Data for Correlation plots: LIG
|
||||||
|
#===========================
|
||||||
|
table(df_lig$ligand_outcome)
|
||||||
|
|
||||||
|
df_lig$log10_or_mychisq = log10(df_lig$or_mychisq)
|
||||||
|
df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher)
|
||||||
|
|
||||||
|
|
||||||
|
df_lig$log10_or_kin = log10(df_lig$or_kin)
|
||||||
|
df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin)
|
||||||
|
|
||||||
|
# subset data to generate pairwise correlations
|
||||||
|
cols_to_select_lig = c("affinity_scaled"
|
||||||
|
|
||||||
|
, "log10_or_mychisq"
|
||||||
|
, "neglog_pval_fisher"
|
||||||
|
|
||||||
|
, "or_kin"
|
||||||
|
, "neglog_pwald_kin"
|
||||||
|
|
||||||
|
, "af"
|
||||||
|
|
||||||
|
, "asa"
|
||||||
|
, "rsa"
|
||||||
|
, "kd_values"
|
||||||
|
, "rd_values"
|
||||||
|
|
||||||
|
, "ligand_outcome"
|
||||||
|
, drug)
|
||||||
|
|
||||||
|
corr_data_lig = df_lig[, cols_to_select_lig]
|
||||||
|
|
||||||
|
dim(corr_data_lig)
|
||||||
|
|
||||||
|
# assign nice colnames (for display)
|
||||||
|
my_corr_colnames_lig = c("Ligand Affinity"
|
||||||
|
|
||||||
|
, "Log (OR)"
|
||||||
|
, "-Log (P)"
|
||||||
|
|
||||||
|
, "OR (adjusted)"
|
||||||
|
, "-Log(P wald)"
|
||||||
|
|
||||||
|
, "AF"
|
||||||
|
|
||||||
|
, "ASA"
|
||||||
|
, "RSA"
|
||||||
|
, "KD"
|
||||||
|
, "RD"
|
||||||
|
|
||||||
|
, "ligand_outcome"
|
||||||
|
, drug)
|
||||||
|
|
||||||
|
length(my_corr_colnames)
|
||||||
|
|
||||||
|
colnames(corr_data_lig)
|
||||||
|
colnames(corr_data_lig) <- my_corr_colnames_lig
|
||||||
|
colnames(corr_data_lig)
|
||||||
|
|
||||||
|
#------------------------
|
||||||
|
# Data for ggcorr LIG plot
|
||||||
|
#------------------------
|
||||||
|
|
||||||
|
start = 1
|
||||||
|
end_ggcorr_lig = which(colnames(corr_data_lig) == "ligand_outcome"); end_ggcorr_lig # should be the last column
|
||||||
|
offset = 1
|
||||||
|
|
||||||
|
my_ggcorr_lig = corr_data_lig[start:(end_ggcorr_lig-1)]
|
||||||
|
head(my_ggcorr_lig); str(my_ggcorr_lig)
|
||||||
|
|
||||||
|
# correlation matrix
|
||||||
|
corr1_lig <- round(cor(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs"), 1)
|
||||||
|
|
||||||
|
# p-value matrix
|
||||||
|
pmat1_lig <- cor_pmat(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs")
|
||||||
|
|
||||||
|
corr2_lig = psych::corr.test(my_ggcorr_lig
|
||||||
|
, method = "spearman"
|
||||||
|
, use = "pairwise.complete.obs")$r
|
||||||
|
|
||||||
|
corr2_lig = round(corr2_lig, 1)
|
||||||
|
|
||||||
|
pmat2_lig = psych::corr.test(my_ggcorr_lig
|
||||||
|
, method = "spearman"
|
||||||
|
, adjust = "none"
|
||||||
|
, use = "pairwise.complete.obs")$p
|
||||||
|
|
||||||
|
corr1_lig == corr2_lig
|
||||||
|
pmat1_lig == pmat2_lig
|
||||||
|
|
||||||
|
|
||||||
|
# for display order columns by hc order of ps
|
||||||
|
|
||||||
|
#col_order = levels(ggcorr_ps$data[2])
|
||||||
|
|
||||||
|
#col_order <- c("Species", "Petal.Width", "Sepal.Length",
|
||||||
|
#"Sepal.Width", "Petal.Length")
|
||||||
|
#my_data2 <- my_data[, col_order]
|
||||||
|
#my_data2
|
||||||
|
|
||||||
|
#------------------------
|
||||||
|
# Generate ggcorr LIG plot
|
||||||
|
#------------------------
|
||||||
|
cat("ggCorr LIG plot:", plot_ggcorr_all_lig)
|
||||||
|
svg(plot_ggcorr_all_lig, width = 15, height = 15)
|
||||||
|
|
||||||
|
ggcorr_lig = ggcorrplot(corr1_lig
|
||||||
|
, p.mat = pmat1_lig
|
||||||
|
, hc.order = TRUE
|
||||||
|
, outline.col = "black"
|
||||||
|
|
||||||
|
, ggtheme = ggplot2::theme_gray
|
||||||
|
, colors = c("#6D9EC1", "white", "#E46726")
|
||||||
|
, title = "Spearman correlations with ligand affinty")
|
||||||
|
|
||||||
|
|
||||||
|
ggcorr_lig
|
||||||
|
#dev.off()
|
||||||
|
|
||||||
|
#######################################################
|
||||||
|
#=============================
|
||||||
|
# combine plots for output
|
||||||
|
#=============================
|
||||||
|
cat("Output plot:", plot_ggcorr_all_combined_labelled)
|
||||||
|
svg(plot_ggcorr_all_combined_labelled , width = 30, height = 10)
|
||||||
|
|
||||||
|
ggcorr_combined = cowplot::plot_grid(ggcorr_ps, ggcorr_lig
|
||||||
|
, nrow = 1
|
||||||
|
, align = "h"
|
||||||
|
, labels = c("(a)","(b)")
|
||||||
|
#, rel_heights = c(1, 1)
|
||||||
|
#, rel_widths = c(0.85, 1, 1)
|
||||||
|
, label_size = 25)
|
||||||
|
|
||||||
|
dev.off()
|
Loading…
Add table
Add a link
Reference in a new issue