added pnca plots

This commit is contained in:
Tanushree Tunstall 2022-09-06 21:36:28 +01:00
parent ade1739753
commit 590cec5e99
3 changed files with 704 additions and 0 deletions

View file

@ -0,0 +1,210 @@
source("~/git/LSHTM_analysis/config/pnca.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("plots will output to:", outdir_images)
my_gg_pairs=function(plot_df, plot_title
, tt_args_size = 2.5
, gp_args_size = 2.5){
ggpairs(plot_df,
columns = 1:(ncol(plot_df)-1),
upper = list(
continuous = wrap('cor', # ggally_cor()
method = "spearman",
use = "pairwise.complete.obs",
title="ρ",
digits=2,
justify_labels = "centre",
title_args=list(size=tt_args_size, colour="black"),#2.5
group_args=list(size=gp_args_size)#2.5
)
),
lower = list(
continuous = wrap("points",
alpha = 0.7,
size=0.125),
combo = wrap("dot",
alpha = 0.7,
size=0.125)
),
aes(colour = factor(ifelse(dst_mode==0,
"S",
"R") ),
alpha = 0.5),
title=plot_title) +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) #+
# theme(text = element_text(size=7,
# face="bold"))
}
DistCutOff = 10
###########################################################################
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
static_cols = c("Log10(MAF)"
#, "Log10(OR)"
)
############################################################
#=============================================
# Creating masked df for affinity data
#=============================================
corr_affinity_df = corr_plotdf
#----------------------
# Mask affinity columns
#-----------------------
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
if (tolower(gene)%in%geneL_ppi2){
corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
}
if (tolower(gene)%in%geneL_na){
corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0
}
# count 0
#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0)
unmasked_vals
##########################################################
#================
# Stability
#================
corr_ps_colnames = c(static_cols
, "mCSM-DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, aff_dist_cols
, "dst_mode")
corr_df_ps = corr_plotdf[, corr_ps_colnames]
# Plot #1
plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates")
##########################################################
#================
# Conservation
#================
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
#, aff_dist_cols
, "dst_mode"
)
corr_df_cons = corr_plotdf[, corr_conservation_cols]
# Plot #2
plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates")
##########################################################
#================
# Affinity: lig, ppi and na as applicable
#================
#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
common_aff_colnames = c("mCSM-lig"
, "mmCSM-lig")
if (tolower(gene)%in%geneL_normal){
aff_colnames = common_aff_colnames
}
if (tolower(gene)%in%geneL_ppi2){
aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
}
if (tolower(gene)%in%geneL_na){
aff_colnames = c(common_aff_colnames, "mCSM-NA")
}
# building ffinal affinity colnames for correlation
corr_aff_colnames = c(static_cols
, aff_colnames
, "dst_mode") # imp
corr_df_aff = corr_affinity_df[, corr_aff_colnames]
colnames(corr_df_aff)
# Plot #3
plot_corr_df_aff = my_gg_pairs(corr_df_aff
, plot_title="Affinity estimates"
#, tt_args_size = 4
#, gp_args_size = 4
)
#### Combine plots #####
# #png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
# png(paste0(outdir_images
# ,tolower(gene)
# ,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
#
# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
# ggmatrix_gtable(plot_corr_df_cons),
# # ggmatrix_gtable(plot_corr_df_aff),
# # nrow=1, ncol=3, rel_heights = 7,7,3
# nrow=1,
# #rel_heights = 1,1
# labels = "AUTO",
# label_size = 12)
# dev.off()
#
# # affinity corr
# #png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
# png(paste0(outdir_images
# ,tolower(gene)
# ,"_CorrC.png"), height =7, width=7, unit="in",res=300)
#
# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
# labels = "C",
# label_size = 12)
# dev.off()
#### Combine A ####
png(paste0(outdir_images
,tolower(gene)
,"_CorrA.png"), height =8, width=8, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
labels = "A",
label_size = 12)
dev.off()
#### Combine B+C ####
# B + C
png(paste0(outdir_images
,tolower(gene)
,"_CorrBC.png"), height = 6, width=11.75, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_cons),
ggmatrix_gtable(plot_corr_df_aff),
# ggmatrix_gtable(plot_corr_df_aff),
# nrow=1, ncol=3, rel_heights = 7,7,3
nrow=1,
#rel_heights = 1,1
labels = c("B", "C"),
label_size = 12)
dev.off()

View file

@ -0,0 +1,318 @@
#################
# Numbers
##################
all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
#
# lf_duet = all_dm_om_df[['lf_duet']]
# table(lf_duet$param_type)
################################################################
#======================
# Data: Dist+Genomics
#======================
lf_dist_genP = all_dm_om_df[['lf_dist_gen']]
wf_dist_genP = all_dm_om_df[['wf_dist_gen']]
levels(lf_dist_genP$param_type)
#lf_dist_genP$param_type <- factor(lf_dist_genP$param_type, levels=c("Log10(MAF)", "Lig Dist(Å)", "PPI Dist(Å)"))
table(lf_dist_genP$param_type)
genomics_param = c("Log10(MAF)")
dist_genP = lf_bp2(lf_dist_genP
#, p_title
, violin_quantiles = c(0.5), monochrome = F)
#dist_genP
#-------------------
# Genomics data plot
#-------------------
genomics_dataP = lf_dist_genP[lf_dist_genP$param_type%in%genomics_param,]
genomics_dataP$param_type = factor(genomics_dataP$param_type)
table(genomics_dataP$param_type)
genomicsP = lf_bp2(genomics_dataP
#, p_title = ""
, dot_transparency = 0.3 #0.3 default
, violin_quantiles = c(0.5), monochrome = F)
genomicsP
# #check
# wilcox.test(wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="R"]
# , wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
#
# tapply(wf_dist_genP$`Log10(MAF)`, wf_dist_genP$mutation_info_labels, summary)
#-------------------
# Distance data plot:
#--------------------
# not genomics
dist_dataP = lf_dist_genP[!lf_dist_genP$param_type%in%genomics_param,]
dist_dataP$param_type = factor(dist_dataP$param_type)
table(dist_dataP$param_type)
levels(dist_dataP$param_type)
# relevel factor to control ordering of appearance of plot
dist_dataP$param_type <-relevel(dist_dataP$param_type, "Lig Dist(Å)" )
table(dist_dataP$param_type)
levels(dist_dataP$param_type)
distanceP = lf_bp2(dist_dataP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
distanceP
# # check
# wilcox.test(wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
# , wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
#
# wilcox.test(wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
# , wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
#
# tapply(wf_dist_genP$`PPI Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
#
# tapply(wf_dist_genP$`Lig Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
#-------------------
# Distance data plot: LigDist
#--------------------
levels(dist_dataP$param_type)[[1]]
#Lig Dist(Å), PPI Dist(Å)
dist_data_lig = dist_dataP[dist_dataP$param_type%in%c(levels(dist_dataP$param_type)[[1]]),]
dist_data_lig$param_type = factor(dist_data_lig$param_type)
table(dist_data_lig$param_type)
levels(dist_data_lig$param_type)
distanceP_lig = lf_bp2(dist_data_lig
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
distanceP_lig
if (tolower(gene)%in%geneL_ppi2){
#-------------------
# Distance data plot: LigDist
#--------------------
levels(dist_dataP$param_type)[[2]]
#Lig Dist(Å), PPI Dist(Å)
dist_data_ppi2 = dist_dataP[dist_dataP$param_type%in%c(levels(dist_dataP$param_type)[[2]]),]
dist_data_ppi2$param_type = factor(dist_data_ppi2$param_type)
table(dist_data_ppi2$param_type)
levels(dist_data_ppi2$param_type)
distanceP_ppi2 = lf_bp2(dist_data_ppi2
#, p_title = ""
, violin_quantiles = c(0.5)
, dot_transparency = 0.2
, monochrome = F)
distanceP_ppi2
}
if (tolower(gene)%in%geneL_na){
#-------------------
# Distance data plot: NADist
#--------------------
levels(dist_dataP$param_type)[[2]]
#Lig Dist(Å), PPI Dist(Å)
dist_data_na = dist_dataP[dist_dataP$param_type%in%c(levels(dist_dataP$param_type)[[2]]),]
dist_data_na$param_type = factor(dist_data_na$param_type)
table(dist_data_na$param_type)
levels(dist_data_na$param_type)
distanceP_na = lf_bp2(dist_data_na
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
distanceP_na
}
#==============
# Plot:DUET
#==============
lf_duetP = all_dm_om_df[['lf_duet']]
#lf_duetP = lf_duet[!lf_duet$param_type%in%c(static_colsP),]
table(lf_duetP$param_type)
lf_duetP$param_type = factor(lf_duetP$param_type)
table(lf_duetP$param_type)
duetP = lf_bp2(lf_duetP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.2)
#==============
# Plot:FoldX
#==============
lf_foldxP = all_dm_om_df[['lf_foldx']]
#lf_foldxP = lf_foldx[!lf_foldx$param_type%in%c(static_colsP),]
table(lf_foldxP$param_type)
lf_foldxP$param_type = factor(lf_foldxP$param_type)
table(lf_foldxP$param_type)
foldxP = lf_bp2(lf_foldxP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.1)
#==============
# Plot:DeepDDG
#==============
lf_deepddgP = all_dm_om_df[['lf_deepddg']]
#lf_deepddgP = lf_deepddg[!lf_deepddg$param_type%in%c(static_colsP),]
table(lf_deepddgP$param_type)
lf_deepddgP$param_type = factor(lf_deepddgP$param_type)
table(lf_deepddgP$param_type)
deepddgP = lf_bp2(lf_deepddgP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.2)
deepddgP
#==============
# Plot: Dynamut2
#==============
lf_dynamut2P = all_dm_om_df[['lf_dynamut2']]
#lf_dynamut2P = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_colsP),]
table(lf_dynamut2P$param_type)
lf_dynamut2P$param_type = factor(lf_dynamut2P$param_type)
table(lf_dynamut2P$param_type)
dynamut2P = lf_bp2(lf_dynamut2P
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.2)
#==============
# Plot:ConSurf
#==============
lf_consurfP = all_dm_om_df[['lf_consurf']]
#lf_consurfP = lf_consurf[!lf_consurf$param_type%in%c(static_colsP),]
table(lf_consurfP$param_type)
lf_consurfP$param_type = factor(lf_consurfP$param_type)
table(lf_consurfP$param_type)
consurfP = lf_bp2(lf_consurfP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
#==============
# Plot:PROVEAN
#==============
lf_proveanP = all_dm_om_df[['lf_provean']]
#lf_proveanP = lf_provean[!lf_provean$param_type%in%c(static_colsP),]
table(lf_proveanP$param_type)
lf_proveanP$param_type = factor(lf_proveanP$param_type)
table(lf_proveanP$param_type)
proveanP = lf_bp2(lf_proveanP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
#==============
# Plot:SNAP2
#==============
lf_snap2P = all_dm_om_df[['lf_snap2']]
#lf_snap2P = lf_snap2[!lf_snap2$param_type%in%c(static_colsP),]
table(lf_snap2P$param_type)
lf_snap2P$param_type = factor(lf_snap2P$param_type)
table(lf_snap2P$param_type)
snap2P = lf_bp2(lf_snap2P
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
############################################################################
#================
# Plot: mCSM-lig
#================
lf_mcsm_ligP = all_dm_om_df[['lf_mcsm_lig']]
#lf_mcsm_ligP = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_colsP),]
table(lf_mcsm_ligP$param_type)
lf_mcsm_ligP$param_type = factor(lf_mcsm_ligP$param_type)
table(lf_mcsm_ligP$param_type)
mcsmligP = lf_bp2(lf_mcsm_ligP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.8)
mcsmligP
#=================
# Plot: mmCSM-lig2
#=================
lf_mmcsm_lig2P = all_dm_om_df[['lf_mmcsm_lig2']]
#lf_mmcsm_lig2P = lf_mmcsm_lig2P[!lf_mmcsm_lig2P$param_type%in%c(static_colsP),]
table(lf_mmcsm_lig2P$param_type)
lf_mmcsm_lig2P$param_type = factor(lf_mmcsm_lig2P$param_type)
table(lf_mmcsm_lig2P$param_type)
mcsmlig2P = lf_bp2(lf_mmcsm_lig2P
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.8)
mcsmlig2P
#================
# Plot: mCSM-ppi2
#================
if (tolower(gene)%in%geneL_ppi2){
lf_mcsm_ppi2P = all_dm_om_df[['lf_mcsm_ppi2']]
#lf_mcsm_ppi2P = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_colsP),]
table(lf_mcsm_ppi2P$param_type)
lf_mcsm_ppi2P$param_type = factor(lf_mcsm_ppi2P$param_type)
table(lf_mcsm_ppi2P$param_type)
mcsmppi2P = lf_bp2(lf_mcsm_ppi2P
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.3)
}
#==============
# Plot: mCSM-NA
#==============
if (tolower(gene)%in%geneL_na){
lf_mcsm_naP = all_dm_om_df[['lf_mcsm_na']]
#lf_mcsm_naP = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_colsP),]
table(lf_mcsm_naP$param_type)
lf_mcsm_naP$param_type = factor(lf_mcsm_naP$param_type)
table(lf_mcsm_naP$param_type)
mcsmnaP = lf_bp2(lf_mcsm_naP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.4)
}
######################################
# Outplot with stats
######################################
# outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
#
# dm_om_combinedP = paste0(outdir_images
# ,tolower(gene)
# ,"_dm_om_all.svg" )
#
# cat("DM OM plots with stats:", dm_om_combinedP)
# svg(dm_om_combinedP, width = 32, height = 18)
# cowplot::plot_grid(
# cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P, genomicsP, distanceP
# , nrow=1
# , rel_widths = c(1/7, 1/7,1/7,1/7, 1/7, 1.75/7)),
# #, rel_widths = c(1/8, 1/8,1/8,1/8, 1/8, 2.75/8)), # for 3 distances
# cowplot::plot_grid(consurfP, proveanP, snap2P
# , mcsmligP
# , mcsmlig2P
# , mcsmppi2P
# #, mcsmnaP
# , nrow=1),
# nrow=2)
#
# dev.off()

View file

@ -0,0 +1,176 @@
# source dm_om_plots.R
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pnca/pnca_dm_om_plots.R")
##### plots to combine ####
duetP
foldxP
deepddgP
dynamut2P
genomicsP
consurfP
proveanP
snap2P
mcsmligP
mcsmlig2P
#mcsmppi2P
# Plot labels
tit1 = "Stability changes"
tit2 = "Genomic measure"
tit3 = "Distance to partners"
tit4 = "Evolutionary Conservation"
tit5 = "Affinity changes"
pt_size = 30
theme_georgia <- function(...) {
theme_gray(base_family = "sans", ...) +
theme(plot.title = element_text(face = "bold"))
}
title_theme <- calc_element("plot.title", theme_georgia())
pt1 = ggdraw() +
draw_label(
tit1,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = pt_size
)
pt2 = ggdraw() +
draw_label(
tit2,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt3 = ggdraw() +
draw_label(
tit3,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt4 = ggdraw() +
draw_label(
tit4,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt5 = ggdraw() +
draw_label(
tit5,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
#======================
# Output plot function
#======================
OutPlot_dm_om = function(x){
# dist b/w plot title and plot
relH_tp = c(0.08, 0.92)
my_label_size = 25
#----------------
# Top panel
#----------------
top_panel = cowplot::plot_grid(
cowplot::plot_grid(pt1,
cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P
, nrow = 1
, labels = c("A", "B", "C", "D")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
NULL,
cowplot::plot_grid(pt2,
cowplot::plot_grid(genomicsP
, nrow = 1
, labels = c("E")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
NULL,
cowplot::plot_grid(pt3,
cowplot::plot_grid( #distanceP
distanceP_lig
#, distanceP_ppi2
, nrow = 1
, labels = c("F")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
nrow = 1,
rel_widths = c(2/6, 0, 0.5/6, 0, 0.5/6)
)
#----------------
# Bottom panel
#----------------
bottom_panel = cowplot::plot_grid(
cowplot::plot_grid(pt4,
cowplot::plot_grid(consurfP, proveanP, snap2P
, nrow = 1
, labels = c("H", "I", "J")
, label_size = my_label_size)
, ncol = 1
, rel_heights =relH_tp
),NULL,
cowplot::plot_grid(pt5,
cowplot::plot_grid(mcsmligP
, mcsmlig2P
#, mcsmppi2P
, nrow = 1
, labels = c("K", "L")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),NULL,
nrow = 1,
rel_widths = c(3/6,0.1/6,3/6, 0.1/6 )
)
#-------------------------------
# combine: Top and Bottom panel
#-------------------------------
cowplot::plot_grid (top_panel, bottom_panel
, nrow =2
, rel_widths = c(1, 1)
, align = "hv")
}
#=====================
# OutPlot: svg and png
#======================
dm_om_combinedP = paste0(outdir_images
,tolower(gene)
,"_dm_om_all.svg")
cat("DM OM plots with stats:", dm_om_combinedP)
svg(dm_om_combinedP, width = 32, height = 18)
OutPlot_dm_om()
dev.off()
dm_om_combinedP_png = paste0(outdir_images
,tolower(gene)
,"_dm_om_all.png")
cat("DM OM plots with stats:", dm_om_combinedP_png)
png(dm_om_combinedP_png, width = 32, height = 18, units = "in", res = 300)
OutPlot_dm_om()
dev.off()