generated ggpairs plots finally

This commit is contained in:
Tanushree Tunstall 2022-08-15 19:05:22 +01:00
parent b68841b337
commit a3e5283a9b
11 changed files with 657 additions and 939 deletions

View file

@ -1,51 +1,88 @@
source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
my_gg_pairs=function(plot_df){
my_gg_pairs=function(plot_df, plot_title
, tt_args_size = 2.5
, gp_args_size = 2.5){
ggpairs(plot_df,
columns = 1:(ncol(plot_df)-1),
upper = list(
continuous = wrap('cor',
continuous = wrap('cor', # ggally_cor()
method = "spearman",
use = "pairwise.complete.obs",
title="ρ",
digits=2,
justify_labels = "left",
title_args=c(colour="black")
justify_labels = "centre",
#title_args=c(colour="black"),
title_args=c(size=tt_args_size),#2.5
group_args=c(size=gp_args_size)#2.5
)
),
lower = list(
continuous = wrap("points",
alpha = 0.7,
size=0.5),
size=0.125),
combo = wrap("dot",
alpha = 0.7,
size=0.5)
size=0.125)
),
aes(colour = factor(ifelse(plot_df$dst_mode==0,
aes(colour = factor(ifelse(dst_mode==0,
"S",
"R") ),
alpha = 0.5),
title="Stability") +
title=plot_title) +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(text = element_text(size=12,
face="bold") )
scale_fill_manual(values = c("red", "blue")) #+
# theme(text = element_text(size=7,
# face="bold"))
}
DistCutOff = 10
###########################################################################
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
static_cols = c("Log10(MAF)"
, "Log10(OR)")
static_cols = c("Log10(MAF)")
#, "Log10(OR)")
############################################################
#=============================================
# Creating masked df for affinity data
#=============================================
corr_affinity_df = corr_plotdf
#----------------------
# Mask affinity columns
#-----------------------
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
if (tolower(gene)%in%geneL_ppi2){
corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
}
# if (tolower(gene)%in%geneL_na){
# corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0
# }
# count 0
#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0)
unmasked_vals
##########################################################
#================
# Stability
#================
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
@ -54,14 +91,13 @@ corr_ps_colnames = c(static_cols
, aff_dist_cols
, "dst_mode")
corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
color_coln = which(colnames(corr_df_ps) == "dst_mode")
corr_end = color_coln-1
# Plot #1
plot_corr_df_ps = my_gg_pairs(corr_df_ps)
plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability features")
##########################################################
#================
# Conservation
#================
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
@ -71,74 +107,66 @@ corr_conservation_cols = c( static_cols
)
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
color_coln = which(colnames(corr_df_cons) == "dst_mode")
corr_end = color_coln-1
# Plot #2
plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation features")
#my_gg_pairs(corr_df_cons)
plot_corr_df_cons = my_gg_pairs(corr_df_cons)
##########################################################
#================
# Affinity: lig, ppi and na as applicable
#================
#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
common_aff_colnames = c("mCSM-lig"
, "mmCSM-lig")
if (tolower(gene)%in%geneL_normal){
aff_colnames = common_aff_colnames
}
if (tolower(gene)%in%geneL_ppi2){
aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
}
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
corr_lig_colnames = c(static_cols
, "mCSM-lig"
, "mmCSM-lig"
, "dst_mode")
if (tolower(gene)%in%geneL_na){
aff_colnames = c(common_aff_colnames, "mCSM-NA")
}
corr_df_lig = corr_plotdf[, corr_lig_colnames]
# building ffinal affinity colnames for correlation
corr_aff_colnames = c(static_cols
, aff_colnames
, "dst_mode") # imp
corr_df_aff = corr_affinity_df[, corr_aff_colnames]
colnames(corr_df_aff)
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
color_coln = which(colnames(corr_df_lig) == "dst_mode")
corr_end = color_coln-1
# Plot #3
plot_corr_df_aff = my_gg_pairs(corr_df_aff, plot_title="Affinity features", tt_args_size = 4, gp_args_size =4)
#my_gg_pairs(corr_df_lig)
plot_corr_df_lig = my_gg_pairs(corr_df_lig)
#=============
# combine
#=============
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
corr_ppi2_colnames = c(static_cols
, "mCSM-PPI2"
, "dst_mode"
)
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
corr_end = color_coln-1
#png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
png(paste0(outdir_images
,tolower(gene)
,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
# NOTE: DELETE LOG OR FROM CORRELATION PLOTS!!!!!
# NOTE: ALSO MAYBE DELETE DISTANCES AS WELL
# NOTE: http://ggobi.github.io/ggally/reference/ggally_cor.html
# "***" if the p-value is < 0.001
# "**" if the p-value is < 0.01
# "*" if the p-value is < 0.05
# "." if the p-value is < 0.10
# "" otherwise
#
# Plot #4
#my_gg_pairs(corr_df_ppi2)
plot_corr_df_ppi2 = my_gg_pairs(corr_df_ppi2)
# corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
# corr_na_colnames = c(static_cols
# , "mCSM-NA"
# , "dst_mode"
# )
#
# corr_df_na = corr_plotdf[, corr_na_colnames]
# complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
# color_coln = which(colnames(corr_df_na) == "dst_mode")
# corr_end = color_coln-1
#
# # Plot #5
# #my_gg_pairs(corr_df_na)
# plot_corr_df_na = my_gg_pairs(corr_df_na)
png("/tmp/gg_pairs_all.png", height = 8, width=11.75, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),ggmatrix_gtable(plot_corr_df_cons),
ggmatrix_gtable(plot_corr_df_lig),ggmatrix_gtable(plot_corr_df_ppi2),
nrow=2, ncol=2, rel_heights = 7,7,3,3)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
ggmatrix_gtable(plot_corr_df_cons),
# ggmatrix_gtable(plot_corr_df_aff),
# nrow=1, ncol=3, rel_heights = 7,7,3
nrow=1,
#rel_heights = 1,1
labels = "AUTO",
label_size = 12)
dev.off()
# affinity corr
#png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
png(paste0(outdir_images
,tolower(gene)
,"_CorrC.png"), height =7, width=7, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
labels = "C",
label_size = 12)
dev.off()