This commit is contained in:
Tanushree Tunstall 2022-08-14 12:17:36 +01:00
parent c09d7530c9
commit 6f354ab390
10 changed files with 833 additions and 285 deletions

View file

@ -0,0 +1,366 @@
#!/usr/bin/env Rscript
#source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
# get plottting dfs
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
####################################################
# ggpairs wrapper
my_gg_pairs=function(plot_df){
ggpairs(plot_df, columns = 1:(ncol(plot_df)-1),
upper = list(continuous = wrap('cor',
method = "spearman",
title="ρ",
digits=2,
title_args=c(colour="black")
)
),
lower = list(
continuous = wrap("points", alpha = 0.7, size=0.5),
combo = wrap("dot", alpha = 0.7, size=0.5)
),
aes(colour = factor(ifelse(plot_df$dst_mode==0, "S", "R")), alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(
text = element_text(size=12, face="bold")
)
}
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
#=======
# Input
#=======
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
colnames(corr_plotdf)
if (all(colnames(corr_df_m3_f) == colnames(corr_plotdf))){
cat("PASS: corr plot colnames match for dashboard")
}else{
stop("Abort: corr plot colnames DO NOT match for dashboard")
}
#corr_plotdf = corr_df_m3_f #for downstream code
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
aff_dist_cols
static_cols = c("Log10(MAF)"
, "Log10(OR)"
#, "-Log10(P)"
)
#================
# stability
#================
#affinity_dist_colnames# lIg DIst and ppi Di
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, aff_dist_cols
, "dst_mode")
if (all(corr_ps_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for correlation")
}else{
stop("Abort: all colnames DO NOT exist for correlation")
}
corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_ps)
color_coln = which(colnames(corr_df_ps) == "dst_mode")
#end = which(colnames(corr_df_ps) == drug)
#ncol_omit = 2
#corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: stability corrP
#------------------------
corr_psP = paste0(outdir_images
,tolower(gene)
,"_corr_stability.svg" )
cat("Corr plot stability with coloured dots:", corr_psP)
svg(corr_psP, width = 15, height = 15)
my_corr_pairs(corr_data_all = corr_df_ps
, corr_cols = colnames(corr_df_ps[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 1.6
, ats = 1.5
, corr_lab_size =2.5
, corr_value_size = 1)
dev.off()
#===============
# CONSERVATION
#==============
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
, aff_dist_cols
, "dst_mode"
, drug)
if (all(corr_conservation_cols%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for ConSurf-correlation")
}else{
stop("Abort: all colnames DO NOT exist for ConSurf-correlation")
}
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_cons)
color_coln = which(colnames(corr_df_cons) == "dst_mode")
# end = which(colnames(corr_df_cons) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#---------------------------
# Output: Conservation corrP
#----------------------------
corr_consP = paste0(outdir_images
,tolower(gene)
,"_corr_conservation.svg" )
cat("Corr plot conservation coloured dots:", corr_consP)
svg(corr_consP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_cons
, corr_cols = colnames(corr_df_cons[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size =1.1
, ats = 1.5
, corr_lab_size = 1.8
, corr_value_size = 1)
dev.off()
#####################################################
#DistCutOff = 10
#LigDist_colname # = "ligand_distance" # from globals
#ppi2Dist_colname = "interface_dist"
#naDist_colname = "TBC"
#####################################################
#================
# ligand affinity
#================
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
corr_lig_colnames = c(static_cols
, "mCSM-lig"
, "mmCSM-lig"
, "dst_mode")
#, drug)
if (all(corr_lig_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for Lig-correlation")
}else{
stop("Abort: all colnames DO NOT exist for Lig-correlation")
}
corr_df_lig = corr_plotdf[, corr_lig_colnames]
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
cat("\nComplete muts for lig affinity for", gene, ":", complete_obs_lig)
color_coln = which(colnames(corr_df_lig) == "dst_mode")
# end = which(colnames(corr_df_lig) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ligand corrP
#------------------------
corr_ligP = paste0(outdir_images
,tolower(gene)
,"_corr_lig.svg" )
cat("Corr plot affinity with coloured dots:", corr_ligP)
svg(corr_ligP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_lig
, corr_cols = colnames(corr_df_lig[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size =3
, corr_value_size = 1)
dev.off()
####################################################
#================
# ppi2 affinity
#================
if (tolower(gene)%in%geneL_ppi2){
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
corr_ppi2_colnames = c(static_cols
, "mCSM-PPI2"
, "dst_mode"
, drug)
if (all(corr_ppi2_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-ppi2 correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-ppi2 correlation")
}
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
# end = which(colnames(corr_df_ppi2) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ppi2 corrP
#------------------------
corr_ppi2P = paste0(outdir_images
,tolower(gene)
,"_corr_ppi2.svg" )
cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
svg(corr_ppi2P, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_ppi2
, corr_cols = colnames(corr_df_ppi2[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
}
# FIXME: ADD distance
#==================
# mCSSM-NA affinity
#==================
#================
# NA affinity
#================
if (tolower(gene)%in%geneL_na){
corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
corr_na_colnames = c(static_cols
, "mCSM-NA"
, "dst_mode"
, drug)
if (all(corr_na_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-NA-correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-NA-correlation")
}
corr_na_colnames%in%colnames(corr_plotdf)
corr_df_na = corr_plotdf[, corr_na_colnames]
complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
color_coln = which(colnames(corr_df_na) == "dst_mode")
# end = which(colnames(corr_df_na) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: mCSM-NA corrP
#------------------------
corr_naP = paste0(outdir_images
,tolower(gene)
,"_corr_na.svg" )
cat("Corr plot mCSM-NA with coloured dots:", corr_naP)
svg(corr_naP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_na
, corr_cols = colnames(corr_df_na[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_na[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
}
####################################################
#===============
#ggpairs:
#================
#corr_df_ps$dst_mode = ifelse(corr_df_cons$dst_mode=="1", "R", "S")
corr_plotting_df = corr_df_ps
svg('~/tmp/foo.svg',
width=10,
height=10,
units="in",
res=300)
my_gg_pairs(corr_plotting_df)
dev.off()
png('~/tmp/foo.png',
width=10,
height=10,
units="in",
res=300)
my_gg_pairs(corr_plotting_df)
dev.off()
#

View file

@ -0,0 +1,49 @@
# Tweak for layout, fonts, and text sizes.
#svg('~/tmp/foo.svg', width=10, height=10, )
# Set the width/height to inches for print. 300 dpi is reasonably ok for "draft"
# output. To raise quality while preserving sanity, increase 'res' and
# DO NOT alter font/point/line sizes
#- [X] Black text for "Corr:" or replace with Rho symbol
#- [X] 0/1 == R/S
#- [X] "rho" symbol instead of "Corr:" text
#- [X] Dot size a bit smaller
#- [X] Plot lines slightly thinner
#
#
png('~/tmp/foo.png',
width=10,
height=10,
units="in",
res=300)
#
corr_plotting_df = corr_df_ps
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
upper = list(continuous = wrap('cor',
method = "spearman",
title="ρ",
digits=2,
title_args=c(colour="black")
)
),
lower = list(
continuous = wrap("points", alpha = 0.7, size=0.5),
combo = wrap("dot", alpha = 0.7, size=0.5)
),
aes(colour = factor(ifelse(corr_plotting_df$dst_mode==0, "S", "R")), alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(
text = element_text(size=12, face="bold")
)
dev.off()
#Check all plots with LSHTM_analysis/scripts/plotting/plotting_colnames.R

View file

@ -0,0 +1,144 @@
source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
my_gg_pairs=function(plot_df){
ggpairs(plot_df,
columns = 1:(ncol(plot_df)-1),
upper = list(
continuous = wrap('cor',
method = "spearman",
title="ρ",
digits=2,
justify_labels = "left",
title_args=c(colour="black")
)
),
lower = list(
continuous = wrap("points",
alpha = 0.7,
size=0.5),
combo = wrap("dot",
alpha = 0.7,
size=0.5)
),
aes(colour = factor(ifelse(plot_df$dst_mode==0,
"S",
"R") ),
alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(text = element_text(size=12,
face="bold") )
}
DistCutOff = 10
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
static_cols = c("Log10(MAF)"
, "Log10(OR)")
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, aff_dist_cols
, "dst_mode")
corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
color_coln = which(colnames(corr_df_ps) == "dst_mode")
corr_end = color_coln-1
# Plot #1
plot_corr_df_ps = my_gg_pairs(corr_df_ps)
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
, aff_dist_cols
, "dst_mode"
)
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
color_coln = which(colnames(corr_df_cons) == "dst_mode")
corr_end = color_coln-1
# Plot #2
#my_gg_pairs(corr_df_cons)
plot_corr_df_cons = my_gg_pairs(corr_df_cons)
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
corr_lig_colnames = c(static_cols
, "mCSM-lig"
, "mmCSM-lig"
, "dst_mode")
corr_df_lig = corr_plotdf[, corr_lig_colnames]
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
color_coln = which(colnames(corr_df_lig) == "dst_mode")
corr_end = color_coln-1
# Plot #3
#my_gg_pairs(corr_df_lig)
plot_corr_df_lig = my_gg_pairs(corr_df_lig)
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
corr_ppi2_colnames = c(static_cols
, "mCSM-PPI2"
, "dst_mode"
)
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
corr_end = color_coln-1
# NOTE: DELETE LOG OR FROM CORRELATION PLOTS!!!!!
# NOTE: ALSO MAYBE DELETE DISTANCES AS WELL
# NOTE: http://ggobi.github.io/ggally/reference/ggally_cor.html
# "***" if the p-value is < 0.001
# "**" if the p-value is < 0.01
# "*" if the p-value is < 0.05
# "." if the p-value is < 0.10
# "" otherwise
#
# Plot #4
#my_gg_pairs(corr_df_ppi2)
plot_corr_df_ppi2 = my_gg_pairs(corr_df_ppi2)
# corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
# corr_na_colnames = c(static_cols
# , "mCSM-NA"
# , "dst_mode"
# )
#
# corr_df_na = corr_plotdf[, corr_na_colnames]
# complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
# color_coln = which(colnames(corr_df_na) == "dst_mode")
# corr_end = color_coln-1
#
# # Plot #5
# #my_gg_pairs(corr_df_na)
# plot_corr_df_na = my_gg_pairs(corr_df_na)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),ggmatrix_gtable(plot_corr_df_cons),
ggmatrix_gtable(plot_corr_df_lig),ggmatrix_gtable(plot_corr_df_ppi2),
nrow=2, ncol=2, rel_heights = 7,7,3,3)