added dir for embb for consistency and checks and moved others to version1

This commit is contained in:
Tanushree Tunstall 2022-08-25 10:19:25 +01:00
parent 19b820e316
commit ac72634b48
19 changed files with 1614 additions and 2 deletions

View file

@ -0,0 +1,391 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Barplots for mCSM DUET, ligand affinity, and foldX
# basic barplots with count of mutations
# basic barplots with frequency of count of mutations
# , df_colname = ""
# , leg_title = ""
# , ats = 25 # axis text size
# , als = 22 # axis label size
# , lts = 20 # legend text size
# , ltis = 22 # label title size
# , geom_ls = 10 # geom_label size
# , yaxis_title = "Number of nsSNPs"
# , bp_plot_title = ""
# , label_categories = c("Destabilising", "Stabilising")
# , title_colour = "chocolate4"
# , subtitle_text = NULL
# , sts = 20
# , subtitle_colour = "pink"
# #, leg_position = c(0.73,0.8) # within plot area
# , leg_position = "top"
# , bar_fill_values = c("#F8766D", "#00BFC4")
#########################################################
#=============
# Data: Input
#==============
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above
# sanity check
cat("\nSourced plotting cols as well:", length(plotting_cols))
####################################################
class(merged_df3)
merged_df3 = as.data.frame(merged_df3)
class(merged_df3)
head(merged_df3$pos_count)
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
head(merged_df3$pos_count)
head(merged_df3$df2_pos_count_all)
# DROP pos_count column
# merged_df3$pos_count <-NULL
merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
head(merged_df3$pos_count)
df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols]
"nca_distance"%in%colnames(df3)
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("plots will output to:", outdir_images)
###########################################################
#------------------------------
# plot default sizes
#------------------------------
#=========================
# Affinity outcome
# check this var: outcome_cols_affinity
# get from preformatting or put in globals
#==========================
DistCutOff
LigDist_colname # = "ligand_distance" # from globals
ppi2Dist_colname
naDist_colname
###########################################################
# get plotting data within the distance
df3_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
df3_na = df3[df3[[naDist_colname]]<DistCutOff,]
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
#------------------------------
mLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "ligand_outcome"
#, leg_title = "mCSM-lig"
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-lig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5)
mLigP
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
# mmCSM-lig: will be the same no. of sites but the effect will be different
#------------------------------
mmLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "mmcsm_lig_outcome"
#, leg_title = "mmCSM-lig"
#, label_categories = labels_mmlig
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "mmCSM-lig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
mmLigP
#------------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_ppi2){
ppi2P = stability_count_bp(plotdf = df3_ppi2
, df_colname = "mcsm_ppi2_outcome"
#, leg_title = "mCSM-ppi2"
#, label_categories = labels_ppi2
#, bp_plot_title = paste(common_bp_title, "PP-interface")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-ppi2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
ppi2P
}
#----------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_na){
nca_distP = stability_count_bp(plotdf = df3_na
, df_colname = "mcsm_na_outcome"
#, leg_title = "mCSM-NA"
#, label_categories =
#, bp_plot_title = paste(common_bp_title, "Dist to NA")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-NA"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
nca_distP
}
#####################################################################
# ------------------------------
# bp site site count: mCSM-lig
# < 10 Ang ligand
# ------------------------------
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
posC_lig = site_snp_count_bp(plotdf = df3_lig
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_lig
#------------------------------
# bp site site count: ppi2
# < 10 Ang interface
#------------------------------
if (tolower(gene)%in%geneL_ppi2){
posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_ppi2
}
#------------------------------
# bp site site count: NCA dist
# < 10 Ang nca
#------------------------------
if (tolower(gene)%in%geneL_na){
posC_nca = site_snp_count_bp(plotdf = df3_na
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_nca
}
#===============================================================
#------------------------------
# bp site site count: ALL
# <10 Ang ligand
#------------------------------
posC_all = site_snp_count_bp(plotdf = df3
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = "All mutations sites"
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_all
##################################################################
consurfP = stability_count_bp(plotdf = df3
, df_colname = "consurf_outcome"
#, leg_title = "ConSurf"
#, label_categories = labels_consurf
, yaxis_title = "Number of nsSNPs"
, leg_position = "top"
, subtitle_text = "ConSurf"
, bar_fill_values = consurf_colours # from globals
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 8
, als = 8
, ltis = 11
, geom_ls = 2)
consurfP
##############################################################
#===================
# Stability
#===================
# duetP
duetP = stability_count_bp(plotdf = df3
, df_colname = "duet_outcome"
, leg_title = "mCSM-DUET"
#, label_categories = labels_duet
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-DUET"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
duetP
# foldx
foldxP = stability_count_bp(plotdf = df3
, df_colname = "foldx_outcome"
#, leg_title = "FoldX"
#, label_categories = labels_foldx
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "FoldX"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
foldxP
# deepddg
deepddgP = stability_count_bp(plotdf = df3
, df_colname = "deepddg_outcome"
#, leg_title = "DeepDDG"
#, label_categories = labels_deepddg
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "DeepDDG"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
deepddgP
# deepddg
dynamut2P = stability_count_bp(plotdf = df3
, df_colname = "ddg_dynamut2_outcome"
#, leg_title = "Dynamut2"
#, label_categories = labels_ddg_dynamut2_outcome
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "Dynamut2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
dynamut2P
# provean
proveanP = stability_count_bp(plotdf = df3
, df_colname = "provean_outcome"
#, leg_title = "PROVEAN"
#, label_categories = labels_provean
, yaxis_title = "Number of nsSNPs"
, leg_position = "none" # top
, subtitle_text = "PROVEAN"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
proveanP
# snap2
snap2P = stability_count_bp(plotdf = df3
, df_colname = "snap2_outcome"
#, leg_title = "SNAP2"
#, label_categories = labels_snap2
, yaxis_title = ""
, leg_position = "none" # top
, subtitle_text = "SNAP2"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5)
snap2P
#####################################################################################

View file

@ -0,0 +1,357 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Barplots
# basic barplots with outcome
# basic barplots with frequency of count of mutations
#########################################################
#=============
# Data: Input
#==============
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above
cat("\nSourced plotting cols as well:", length(plotting_cols))
####################################################
class(merged_df3)
df3 = subset(merged_df3, select = -c(pos_count))
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("plots will output to:", outdir_images)
###########################################################
#------------------------------
# plot default sizes
#------------------------------
#=========================
# Affinity outcome
# check this var: outcome_cols_affinity
# get from preformatting or put in globals
#==========================
DistCutOff
LigDist_colname # = "ligand_distance" # from globals
ppi2Dist_colname
naDist_colname
###########################################################
# get plotting data within the distance
df3_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
df3_na = df3[df3[[naDist_colname]]<DistCutOff,]
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
#------------------------------
mLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "ligand_outcome"
#, leg_title = "mCSM-lig"
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-lig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5)
mLigP
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
# mmCSM-lig: will be the same no. of sites but the effect will be different
#------------------------------
mmLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "mmcsm_lig_outcome"
#, leg_title = "mmCSM-lig"
#, label_categories = labels_mmlig
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "mmCSM-lig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
mmLigP
#------------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_ppi2){
ppi2P = stability_count_bp(plotdf = df3_ppi2
, df_colname = "mcsm_ppi2_outcome"
#, leg_title = "mCSM-ppi2"
#, label_categories = labels_ppi2
#, bp_plot_title = paste(common_bp_title, "PP-interface")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-ppi2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
ppi2P
}
#----------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_na){
nca_distP = stability_count_bp(plotdf = df3_na
, df_colname = "mcsm_na_outcome"
#, leg_title = "mCSM-NA"
#, label_categories =
#, bp_plot_title = paste(common_bp_title, "Dist to NA")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-NA"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
nca_distP
}
#####################################################################
# ------------------------------
# bp site site count: mCSM-lig
# < 10 Ang ligand
# ------------------------------
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
posC_lig = site_snp_count_bp(plotdf = df3_lig
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_lig
#------------------------------
# bp site site count: ppi2
# < 10 Ang interface
#------------------------------
if (tolower(gene)%in%geneL_ppi2){
posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_ppi2
}
#------------------------------
# bp site site count: NCA dist
# < 10 Ang nca
#------------------------------
if (tolower(gene)%in%geneL_na){
posC_nca = site_snp_count_bp(plotdf = df3_na
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_nca
}
#===============================================================
#------------------------------
# bp site site count: ALL
# <10 Ang ligand
#------------------------------
posC_all = site_snp_count_bp(plotdf = df3
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = "All mutations sites"
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_all
##################################################################
consurfP = stability_count_bp(plotdf = df3
, df_colname = "consurf_outcome"
#, leg_title = "ConSurf"
#, label_categories = labels_consurf
, yaxis_title = "Number of nsSNPs"
, leg_position = "top"
, subtitle_text = "ConSurf"
, bar_fill_values = consurf_colours # from globals
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 8
, als = 8
, ltis = 11
, geom_ls = 2)
consurfP
##############################################################
#===================
# Stability
#===================
# duetP
duetP = stability_count_bp(plotdf = df3
, df_colname = "duet_outcome"
, leg_title = "mCSM-DUET"
#, label_categories = labels_duet
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-DUET"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
duetP
# foldx
foldxP = stability_count_bp(plotdf = df3
, df_colname = "foldx_outcome"
#, leg_title = "FoldX"
#, label_categories = labels_foldx
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "FoldX"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
foldxP
# deepddg
deepddgP = stability_count_bp(plotdf = df3
, df_colname = "deepddg_outcome"
#, leg_title = "DeepDDG"
#, label_categories = labels_deepddg
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "DeepDDG"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
deepddgP
# deepddg
dynamut2P = stability_count_bp(plotdf = df3
, df_colname = "ddg_dynamut2_outcome"
#, leg_title = "Dynamut2"
#, label_categories = labels_ddg_dynamut2_outcome
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "Dynamut2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
dynamut2P
# provean
proveanP = stability_count_bp(plotdf = df3
, df_colname = "provean_outcome"
#, leg_title = "PROVEAN"
#, label_categories = labels_provean
, yaxis_title = "Number of nsSNPs"
, leg_position = "none" # top
, subtitle_text = "PROVEAN"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
proveanP
# snap2
snap2P = stability_count_bp(plotdf = df3
, df_colname = "snap2_outcome"
#, leg_title = "SNAP2"
#, label_categories = labels_snap2
, yaxis_title = ""
, leg_position = "none" # top
, subtitle_text = "SNAP2"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5)
snap2P
#####################################################################################

View file

@ -0,0 +1,270 @@
duetP
foldxP
deepddgP
dynamut2P
proveanP
snap2P
mLigP
mmLigP
posC_lig
ppi2P
posC_ppi2
sensP
peP
#========================
# Common title settings
#=========================
theme_georgia <- function(...) {
theme_gray(base_family = "sans", ...) +
theme(plot.title = element_text(face = "bold"))
}
title_theme <- calc_element("plot.title", theme_georgia())
###############################################################
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
# extract common legends
# lig affinity
common_legend_outcome = get_legend(mLigP +
guides(color = guide_legend(nrow = 1)) +
theme(legend.position = "top"))
# stability
common_legend_outcome = get_legend(duetP +
guides(color = guide_legend(nrow = 1)) +
theme(legend.position = "top"))
# conservation
cons_common_legend_outcome = get_legend(snap2P +
guides(color = guide_legend(nrow = 1)) +
theme(legend.position = "top"))
###################################################################
#==================================
# Stability+Consevation: COMBINE
#==================================
tt_size = 10
#----------------------------
# stability and consv title
#----------------------------
tt_size = 10
tt_stab = ggdraw() +
draw_label(
paste0("Stability outcome"),
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = tt_size
)
tt_cons = ggdraw() +
draw_label(
paste0("Conservation outcome"),
fontfamily = title_theme$family,
fontface = title_theme$face,
size = tt_size
)
#----------------------
# Output plot
#-----------------------
stab_cons_CLP = paste0(outdir_images
,tolower(gene)
,"_stab_cons_BP_CLP.png")
print(paste0("plot filename:", stab_cons_CLP))
png(stab_cons_CLP, units = "in", width = 10, height = 5, res = 300 )
cowplot::plot_grid(
cowplot::plot_grid(
cowplot::plot_grid(
tt_stab,
common_legend_outcome,
nrow = 2
),
cowplot::plot_grid(
duetP,
foldxP,
deepddgP,
dynamut2P,
nrow = 1,
labels = c("A", "B", "C", "D"),
label_size = 12),
nrow = 2,
rel_heights=c(1,10)
),
NULL,
cowplot::plot_grid(
cowplot::plot_grid(
cowplot::plot_grid(
tt_cons,
cons_common_legend_outcome,
nrow = 2
),
cowplot::plot_grid(
proveanP,
snap2P,
nrow=1,
labels = c("E", "F"),
align = "hv"),
nrow = 2,
rel_heights = c(1, 10),
label_size = 12),
nrow=1
),
rel_widths = c(2,0.15,1),
nrow=1
)
dev.off()
#################################################################
#=======================================
# Affinity barplots: COMBINE ALL three
#========================================
ligT = paste0(common_bp_title, " ligand")
lig_affT = ggdraw() +
draw_label(
ligT,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT
, common_legend_outcome
, nrow=2),
cowplot::plot_grid(mLigP, mmLigP, posC_lig
, nrow = 1
, rel_widths = c(1,1,1.8)
, align = "h"),
nrow = 2,
rel_heights = c(1,8)
)
#p1
###########################################################
ppi2T = paste0(common_bp_title, " PP-interface")
ppi2_affT = ggdraw() +
draw_label(
ppi2T,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
###########################################################
p2 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nrow=2),
cowplot::plot_grid(ppi2P, posC_ppi2
, nrow = 1
, rel_widths = c(1.2,1.8)
, align = "h"),
nrow = 2,
rel_heights = c(1,8)
)
#p2
###########################################################
# PE + All position count
peT_allT = ggdraw() +
draw_label(
paste0("All mutation sites"),
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
, rel_widths = c(1,3),axis = "lr"),
cowplot::plot_grid(
peP2, posC_all,
nrow = 2,
rel_widths = c(1,1),
align = "v",
axis = "lr",
rel_heights = c(1,8)
),
rel_heights = c(1,18),
nrow = 2,axis = "lr")
p3
#===============
# Final combine
#===============
w = 11.79
h = 3.5
mut_impact_CLP = paste0(outdir_images
,tolower(gene)
,"_mut_impactCLP.png")
#svg(affP, width = 20, height = 5.5)
print(paste0("plot filename:", mut_impact_CLP))
png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
cowplot::plot_grid(p1, p2, p3
, nrow = 1
, labels = "AUTO"
, label_size = 12
, rel_widths = c(3,2,2)
#, rel_heights = c(1)
)
dev.off()
##################################################
sensP
consurfP
#=================
# Combine sensitivity + ConSurf
# or ConSurf
#=================
w = 3
h = 3
# sens_conP = paste0(outdir_images
# ,tolower(gene)
# ,"_sens_cons_CLP.png")
#
# print(paste0("plot filename:", sens_conP))
# png(sens_conP, units = "in", width = w, height = h, res = 300 )
#
# cowplot::plot_grid(sensP, consurfP,
# nrow = 2,
# rel_heights = c(1, 1.5)
# )
#
# dev.off()
conCLP = paste0(outdir_images
,tolower(gene)
,"_consurf_BP.png")
print(paste0("plot filename:", conCLP))
png(conCLP, units = "in", width = w, height = h, res = 300 )
consurfP
dev.off()
#================================
# Sensitivity mutation numbers: geom_tile
#================================
sensCLP = paste0(outdir_images
,tolower(gene)
,"_sensN_tile.png")
print(paste0("plot filename:", sensCLP))
png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
sensP
dev.off()
#================================
# Sensitivity SITE numbers: geom_tile
#================================
sens_siteCLP = paste0(outdir_images
,tolower(gene)
,"_sens_siteC_tile.png")
print(paste0("plot filename:", sens_siteCLP))
png(sens_siteCLP, units = "in", width = 1, height = 1, res = 300 )
sens_siteP
dev.off()
###########################################################

View file

@ -0,0 +1,347 @@
#!/usr/bin/env Rscript
#source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
# get plottting dfs
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
####################################################
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
#=======
# Input
#=======
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
colnames(corr_plotdf)
if (all(colnames(corr_df_m3_f) == colnames(corr_plotdf))){
cat("PASS: corr plot colnames match for dashboard")
}else{
stop("Abort: corr plot colnames DO NOT match for dashboard")
}
#corr_plotdf = corr_df_m3_f #for downstream code
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
aff_dist_cols
static_cols = c("Log10(MAF)"
, "Log10(OR)"
#, "-Log10(P)"
)
#================
# stability
#================
#affinity_dist_colnames# lIg DIst and ppi Di
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, aff_dist_cols
, "dst_mode")
if (all(corr_ps_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for correlation")
}else{
stop("Abort: all colnames DO NOT exist for correlation")
}
corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_ps)
color_coln = which(colnames(corr_df_ps) == "dst_mode")
#end = which(colnames(corr_df_ps) == drug)
#ncol_omit = 2
#corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: stability corrP
#------------------------
corr_psP = paste0(outdir_images
,tolower(gene)
,"_corr_stability.svg" )
cat("Corr plot stability with coloured dots:", corr_psP)
svg(corr_psP, width = 15, height = 15)
my_corr_pairs(corr_data_all = corr_df_ps
, corr_cols = colnames(corr_df_ps[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 1.6
, ats = 1.5
, corr_lab_size =2.5
, corr_value_size = 1)
dev.off()
#===============
# CONSERVATION
#==============
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
, aff_dist_cols
, "dst_mode"
, drug)
if (all(corr_conservation_cols%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for ConSurf-correlation")
}else{
stop("Abort: all colnames DO NOT exist for ConSurf-correlation")
}
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_cons)
color_coln = which(colnames(corr_df_cons) == "dst_mode")
# end = which(colnames(corr_df_cons) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#---------------------------
# Output: Conservation corrP
#----------------------------
corr_consP = paste0(outdir_images
,tolower(gene)
,"_corr_conservation.svg" )
cat("Corr plot conservation coloured dots:", corr_consP)
svg(corr_consP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_cons
, corr_cols = colnames(corr_df_cons[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size =1.1
, ats = 1.5
, corr_lab_size = 1.8
, corr_value_size = 1)
dev.off()
#####################################################
#DistCutOff = 10
#LigDist_colname # = "ligand_distance" # from globals
#ppi2Dist_colname = "interface_dist"
#naDist_colname = "TBC"
#####################################################
#================
# ligand affinity
#================
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
corr_lig_colnames = c(static_cols
, "mCSM-lig"
, "mmCSM-lig"
, "dst_mode")
#, drug)
if (all(corr_lig_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for Lig-correlation")
}else{
stop("Abort: all colnames DO NOT exist for Lig-correlation")
}
corr_df_lig = corr_plotdf[, corr_lig_colnames]
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
cat("\nComplete muts for lig affinity for", gene, ":", complete_obs_lig)
color_coln = which(colnames(corr_df_lig) == "dst_mode")
# end = which(colnames(corr_df_lig) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ligand corrP
#------------------------
corr_ligP = paste0(outdir_images
,tolower(gene)
,"_corr_lig.svg" )
cat("Corr plot affinity with coloured dots:", corr_ligP)
svg(corr_ligP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_lig
, corr_cols = colnames(corr_df_lig[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size =3
, corr_value_size = 1)
dev.off()
####################################################
#================
# ppi2 affinity
#================
if (tolower(gene)%in%geneL_ppi2){
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
corr_ppi2_colnames = c(static_cols
, "mCSM-PPI2"
, "dst_mode"
, drug)
if (all(corr_ppi2_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-ppi2 correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-ppi2 correlation")
}
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
# end = which(colnames(corr_df_ppi2) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ppi2 corrP
#------------------------
corr_ppi2P = paste0(outdir_images
,tolower(gene)
,"_corr_ppi2.svg" )
cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
svg(corr_ppi2P, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_ppi2
, corr_cols = colnames(corr_df_ppi2[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
}
# FIXME: ADD distance
#==================
# mCSSM-NA affinity
#==================
#================
# NA affinity
#================
if (tolower(gene)%in%geneL_na){
corr_df_na = corr_df_na[corr_df_na["NCA-Dist"]<DistCutOff,]
corr_na_colnames = c(static_cols
, "mCSM-NA"
, "dst_mode"
, drug)
if (all(corr_na_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-NA-correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-NA-correlation")
}
corr_na_colnames%in%colnames(corr_plotdf)
corr_df_na = corr_plotdf[, corr_na_colnames]
complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
color_coln = which(colnames(corr_df_na) == "dst_mode")
# end = which(colnames(corr_df_na) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: mCSM-NA corrP
#------------------------
corr_naP = paste0(outdir_images
,tolower(gene)
,"_corr_na.svg" )
cat("Corr plot mCSM-NA with coloured dots:", corr_naP)
svg(corr_naP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_na
, corr_cols = colnames(corr_df_na[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_na[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
}
####################################################
#===============
#ggpairs:
#================
#corr_df_ps$dst_mode = ifelse(corr_df_cons$dst_mode=="1", "R", "S")
svg('/tmp/foo.svg', width=10, height=10, )
corr_plotting_df = corr_df_ps
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
upper = list(continuous = wrap('cor',
method = "spearman",
title="ρ",
digits=2,
title_args=c(colour="black")
)
),
lower = list(
continuous = wrap("points", alpha = 0.7, size=0.5),
combo = wrap("dot", alpha = 0.7, size=0.5)
),
aes(colour = factor(ifelse(corr_plotting_df$dst_mode==0, "S", "R")), alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(
text = element_text(size=12, face="bold")
)
dev.off()
#

View file

@ -0,0 +1,164 @@
# source dm_om_plots.R
#============
# Plot labels
#============
tit1 = "Stability changes"
tit2 = "Genomic measure"
tit3 = "Distance to partners"
tit4 = "Evolutionary Conservation"
tit5 = "Affinity changes"
pt_size = 30
theme_georgia <- function(...) {
theme_gray(base_family = "sans", ...) +
theme(plot.title = element_text(face = "bold"))
}
title_theme <- calc_element("plot.title", theme_georgia())
pt1 = ggdraw() +
draw_label(
tit1,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = pt_size
)
pt2 = ggdraw() +
draw_label(
tit2,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt3 = ggdraw() +
draw_label(
tit3,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt4 = ggdraw() +
draw_label(
tit4,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt5 = ggdraw() +
draw_label(
tit5,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
#======================
# Output plot function
#======================
OutPlot_dm_om = function(x){
# dist b/w plot title and plot
relH_tp = c(0.08, 0.92)
my_label_size = 25
#----------------
# Top panel
#----------------
top_panel = cowplot::plot_grid(
cowplot::plot_grid(pt1,
cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P
, nrow = 1
, labels = c("A", "B", "C", "D")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
NULL,
cowplot::plot_grid(pt2,
cowplot::plot_grid(genomicsP
, nrow = 1
, labels = c("E")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
NULL,
cowplot::plot_grid(pt3,
cowplot::plot_grid( #distanceP
distanceP_lig
#, distanceP_ppi2
, distanceP_na
, nrow = 1
, labels = c("F", "G")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
nrow = 1,
rel_widths = c(2/7, 0.1/7, 0.5/7, 0.1/7, 1/7)
)
#----------------
# Bottom panel
#----------------
bottom_panel = cowplot::plot_grid(
cowplot::plot_grid(pt4,
cowplot::plot_grid(consurfP, proveanP, snap2P
, nrow = 1
, labels = c("H", "I", "J")
, label_size = my_label_size)
, ncol = 1
, rel_heights =relH_tp
),NULL,
cowplot::plot_grid(pt5,
cowplot::plot_grid(mcsmligP, mcsmlig2P
#, mcsmppi2P
, mcsmnaP
, nrow = 1
, labels = c("K", "L", "M")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),NULL,
nrow = 1,
rel_widths = c(3/6,0.1/6,3/6, 0.1/6 )
)
#-------------------------------
# combine: Top and Bottom panel
#-------------------------------
cowplot::plot_grid (top_panel, bottom_panel
, nrow =2
, rel_widths = c(1, 1)
, align = "hv")
}
#=====================
# OutPlot: svg and png
#======================
dm_om_combinedP = paste0(outdir_images
,tolower(gene)
,"_dm_om_all.svg")
cat("DM OM plots with stats:", dm_om_combinedP)
svg(dm_om_combinedP, width = 32, height = 18)
OutPlot_dm_om()
dev.off()
dm_om_combinedP_png = paste0(outdir_images
,tolower(gene)
,"_dm_om_all.png")
cat("DM OM plots with stats:", dm_om_combinedP_png)
png(dm_om_combinedP_png, width = 32, height = 18, units = "in", res = 300)
OutPlot_dm_om()
dev.off()

View file

@ -0,0 +1,56 @@
# Tweak for layout, fonts, and text sizes.
#svg('~/tmp/foo.svg', width=10, height=10, )
# Set the width/height to inches for print. 300 dpi is reasonably ok for "draft"
# output. To raise quality while preserving sanity, increase 'res' and
# DO NOT alter font/point/line sizes
#- [X] Black text for "Corr:" or replace with Rho symbol
#- [X] 0/1 == R/S
#- [X] "rho" symbol instead of "Corr:" text
#- [X] Dot size a bit smaller
#- [X] Plot lines slightly thinner
#
#
png('~/tmp/foo.png',
width=10,
height=10,
units="in",
res=300)
#
#corr_plotting_df = corr_df_ps
colnames(corr_plotdf)
corr_plotting_df = subset(corr_plotdf, select = -c(ethambutol,`Log10(OR)`,`-Log10(P)`, ASA, RSA, KD, RD
, FoldX
, DeepDDG
, Dynamut2 ))
colnames(corr_plotting_df)
#ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)),
upper = list(continuous = wrap('cor',
method = "spearman",
use = "pairwise.complete.obs",
title="ρ",
digits=2,
title_args=c(colour="black")
)
),
lower = list(
continuous = wrap("points", alpha = 0.7, size=0.5),
combo = wrap("dot", alpha = 0.7, size=0.5)
),
aes(colour = factor(ifelse(dst_mode==0, "S", "R")), alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(
text = element_text(size=12, face="bold")
)
dev.off()
#Check all plots with LSHTM_analysis/scripts/plotting/plotting_colnames.R

View file

@ -0,0 +1,175 @@
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
my_gg_pairs=function(plot_df, plot_title
, tt_args_size = 2.5
, gp_args_size = 2.5){
ggpairs(plot_df,
columns = 1:(ncol(plot_df)-1),
upper = list(
continuous = wrap('cor', # ggally_cor()
method = "spearman",
use = "pairwise.complete.obs",
title="ρ",
digits=2,
justify_labels = "centre",
title_args=list(size=tt_args_size, colour="black"),#2.5
group_args=list(size=gp_args_size)#2.5
)
),
lower = list(
continuous = wrap("points",
alpha = 0.7,
size=0.125),
combo = wrap("dot",
alpha = 0.7,
size=0.125)
),
aes(colour = factor(ifelse(dst_mode==0,
"S",
"R") ),
alpha = 0.5),
title=plot_title) +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) #+
# theme(text = element_text(size=7,
# face="bold"))
}
DistCutOff = 10
###########################################################################
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
static_cols = c("Log10(MAF)"
, "Log10(OR)"
)
############################################################
#=============================================
# Creating masked df for affinity data
#=============================================
corr_affinity_df = corr_plotdf
#----------------------
# Mask affinity columns
#-----------------------
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
if (tolower(gene)%in%geneL_ppi2){
corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
}
# if (tolower(gene)%in%geneL_na){
# corr_affinity_df[corr_affinity_df["NCA-Dist"]>DistCutOff,"mCSM-NA"]=0
# }
# count 0
#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0)
unmasked_vals
##########################################################
#================
# Stability
#================
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, aff_dist_cols
, "dst_mode")
corr_df_ps = corr_plotdf[, corr_ps_colnames]
# Plot #1
plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates")
##########################################################
#================
# Conservation
#================
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
#, aff_dist_cols
, "dst_mode"
)
corr_df_cons = corr_plotdf[, corr_conservation_cols]
# Plot #2
plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates")
##########################################################
#================
# Affinity: lig, ppi and na as applicable
#================
#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
common_aff_colnames = c("mCSM-lig"
, "mmCSM-lig")
if (tolower(gene)%in%geneL_normal){
aff_colnames = common_aff_colnames
}
if (tolower(gene)%in%geneL_ppi2){
aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
}
if (tolower(gene)%in%geneL_na){
aff_colnames = c(common_aff_colnames, "mCSM-NA")
}
# building ffinal affinity colnames for correlation
corr_aff_colnames = c(static_cols
, aff_colnames
, "dst_mode") # imp
corr_df_aff = corr_affinity_df[, corr_aff_colnames]
colnames(corr_df_aff)
# Plot #3
plot_corr_df_aff = my_gg_pairs(corr_df_aff, plot_title="Affinity estimates", tt_args_size = 4, gp_args_size =4)
#=============
# combine
#=============
#png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
png(paste0(outdir_images
,tolower(gene)
,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
ggmatrix_gtable(plot_corr_df_cons),
# ggmatrix_gtable(plot_corr_df_aff),
# nrow=1, ncol=3, rel_heights = 7,7,3
nrow=1,
#rel_heights = 1,1
labels = "AUTO",
label_size = 12)
dev.off()
# affinity corr
#png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
png(paste0(outdir_images
,tolower(gene)
,"_CorrC.png"), height =7, width=7, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
labels = "C",
label_size = 12)
dev.off()

View file

@ -0,0 +1,78 @@
##############################################################
# PE count
##############################################################
rects <- data.frame(x = 1:6,
colors = c("#ffd700" #gold
, "#f0e68c" #khaki
, "#da70d6"# orchid
, "#ff1493"# deeppink
, "#00BFC4" #, "#007d85" #blue
, "#F8766D" )# red,
)
rects
rects$text = c("-ve Lig"
, "+ve Lig"
, "+ve PPI2"
, "-ve PPI2"
, "+ve stability"
, "-ve stability")
# FOR EMBB ONLY
rects$numbers = c(38, 0, 22, 9, 108, 681)
rects$num_labels = paste0("n=", rects$numbers)
rects
#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile(width = 1, height = 1) + # make square tiles
geom_text(color = "black", size = 1.7) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
coord_flip()+ scale_x_reverse() +
# theme_void() # remove any axis markings
theme_nothing() # remove any axis markings
peP
peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile() + # make square tiles
geom_text(color = "black", size = 1.6) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
theme_nothing() # remove any axis markings
peP2
########################################################
# MANUAL process
#===============================
# Sensitivity count: Site
#==============================
table(df3$sensitivity)
#--------
# embb
#--------
#rsc = 54
#ccc = 46
#ssc = 470
rect_rs_siteC =data.frame(mutation_class=c("A_Resistant sites"
, "B_Common sites"
, "C_Sensitive sites"),
tile_colour =c("red",
"purple",
"blue"),
numbers = c(rsc, ccc, ssc),
order = c(1, 2, 3))
rect_rs_siteC$labels = paste0(rect_rs_siteC$mutation_class, "\nn=", rect_rs_siteC$ numbers)
sens_siteP = ggplot(rect_rs_siteC, aes(mutation_class, y = 0,
fill = tile_colour,
label = paste0("n=", numbers))) +
geom_tile(width = 1, height = 1) +
geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) +
theme_nothing()
sens_siteP

View file

@ -0,0 +1,251 @@
#!/usr/bin/env Rscript
#source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
# get plottting dfs
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
###################################################################
# FIXME: ADD distance to NA when SP replies
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
# LigDist_colname # from globals used
# ppi2Dist_colname #from globals used
# naDist_colname #from globals used
common_cols = c("mutationinformation"
, "X5uhc_position"
, "X5uhc_offset"
, "position"
, "dst_mode"
, "mutation_info_labels"
, "sensitivity", dist_columns )
########################################
categ_cols_to_factor = grep( "_outcome|_info", colnames(merged_df3) )
fact_cols = colnames(merged_df3)[categ_cols_to_factor]
if (any(lapply(merged_df3[, fact_cols], class) == "character")){
cat("\nChanging", length(categ_cols_to_factor), "cols to factor")
merged_df3[, fact_cols] <- lapply(merged_df3[, fact_cols], as.factor)
if (all(lapply(merged_df3[, fact_cols], class) == "factor")){
cat("\nSuccessful: cols changed to factor")
}
}else{
cat("\nRequested cols aready factors")
}
cat("\ncols changed to factor are:\n", colnames(merged_df3)[categ_cols_to_factor] )
####################################
# merged_df3: NECESSARY pre-processing
###################################
#df3 = merged_df3
plot_cols = c("mutationinformation", "mutation_info_labels", "position", "dst_mode"
, all_cols)
all_cols = c(common_cols
, all_stability_cols
, all_affinity_cols
, all_conserv_cols)
# counting
foo = merged_df3[, c("mutationinformation"
, "wild_pos"
, "position"
, "sensitivity"
, "avg_lig_affinity"
, "avg_lig_affinity_scaled"
, "avg_lig_affinity_outcome"
, "ligand_distance"
, "ligand_affinity_change"
, "affinity_scaled"
, "ligand_outcome"
, "consurf_colour_rev"
, "consurf_outcome")]
table(foo$consurf_outcome)
foo2 = foo[foo$ligand_distance<10,]
table(foo2$ligand_outcome)
#############################
# wide plots SNP
# DRUG
length(aa_pos_drug); aa_pos_drug
drug = foo[foo$position%in%aa_pos_drug,]
drug$wild_pos
length(unique(drug$position)); unique(drug$position)
table(drug$position)
drug$mutationinformation[drug$position==306]
drug$mutationinformation[drug$position==303]
#CA
length(aa_pos_ca); aa_pos_ca
ca = foo[foo$position%in%aa_pos_ca,]
ca$position; length(unique(ca$position))
table(ca$position)
# DSL
length(aa_pos_dsl); aa_pos_dsl
dsl= foo[foo$position%in%aa_pos_dsl,]
dsl$position; length(unique(dsl$position))
table(dsl$position)
dsl$mutationinformation[dsl$position==330]
dsl$mutationinformation[dsl$position==438]
dsl$mutationinformation[dsl$position==439]
dsl$mutationinformation[dsl$position==510]
# CDL
length(aa_pos_cdl); aa_pos_cdl
cdl= foo[foo$position%in%aa_pos_cdl,]
length(unique(cdl$position)); cdl$position;
table(cdl$position)
cdl$mutationinformation[cdl$position==456]
cdl$mutationinformation[cdl$position==521]
cdl$mutationinformation[cdl$position==554]
cdl$mutationinformation[cdl$position==568]
cdl$mutationinformation[cdl$position==575]
cdl$mutationinformation[cdl$position==580]
cdl$mutationinformation[cdl$position==658]
cdl$mutationinformation[cdl$position==665]
###############################################
# OR plot
bar = merged_df3[, c("mutationinformation"
, "wild_pos"
, "position"
, "sensitivity"
, affinity_dist_colnames
, "or_mychisq"
, "pval_fisher"
#, "pval_chisq"
, "neglog_pval_fisher"
, "log10_or_mychisq")]
# bar$p_adj_bonferroni = p.adjust(bar$pval_fisher, method = "bonferroni")
# bar$signif_bon = bar$p_adj_bonferroni
# bar = dplyr::mutate(bar
# , signif_bon = case_when(signif_bon == 0.05 ~ "."
# , signif_bon <=0.0001 ~ '****'
# , signif_bon <=0.001 ~ '***'
# , signif_bon <=0.01 ~ '**'
# , signif_bon <0.05 ~ '*'
# , TRUE ~ 'ns'))
bar$p_adj_fdr = p.adjust(bar$pval_fisher, method = "BH")
bar$signif_fdr = bar$p_adj_fdr
bar = dplyr::mutate(bar
, signif_fdr = case_when(signif_fdr == 0.05 ~ "."
, signif_fdr <=0.0001 ~ '****'
, signif_fdr <=0.001 ~ '***'
, signif_fdr <=0.01 ~ '**'
, signif_bon <0.05 ~ '*'
, TRUE ~ 'ns'))
# sort df
bar = bar[order(bar$or_mychisq, decreasing = T), ]
bar = bar[, c("mutationinformation"
, "wild_pos"
, "position"
, "sensitivity"
, affinity_dist_colnames
, "or_mychisq"
#, "pval_fisher"
#, "pval_chisq"
#, "neglog_pval_fisher"
#, "log10_or_mychisq"
#, "signif_bon"
, "p_adj_fdr"
, "signif_fdr")]
table(bar$sensitivity)
table(bar$or_mychisq>1&bar$signif_fdr) # sen and res ~ OR
str(bar)
sen = bar[bar$or_mychisq<1,]
sen = na.omit(sen)
res = bar[bar$or_mychisq>1,]
res = na.omit(res)
# comp
bar_or = bar[!is.na(bar$or_mychisq),]
table(bar_or$sensitivity)
sen1 = bar_or[bar_or$or_mychisq<1,] # sen and res ~OR
res1 = bar_or[bar_or$or_mychisq>1,] # sen and res ~OR
# sanity check
if (nrow(bar_or) == nrow(sen1) + nrow(res1) ){
cat("\nPASS: df with or successfully sourced"
, "\nCalculating % of muts with OR>1")
}else{
stop("Abort: df with or numbers mimatch")
}
# percent for OR muts
pc_orR = nrow(res1)/(nrow(sen1) + nrow(res1)); pc_orR
cat("\nPercentage of muts with OR>1 i.e resistant:"
, pc_orR *100 )
# muts with highest OR
head(bar_or$mutationinformation, 10)
# sort df
bar_or = bar_or[order(bar_or$or_mychisq
, bar_or$ligand_distance
, bar_or$interface_dist
, decreasing = T), ]
bar_or$drug_site = ifelse(bar_or$position%in%aa_pos_drug, "drug", "no")
table(bar_or$drug_site)
bar_or$dsl_site = ifelse(bar_or$position%in%aa_pos_dsl, "dsl", "no")
table(bar_or$dsl_site)
bar_or$ca_site = ifelse(bar_or$position%in%aa_pos_ca, "ca", "no")
table(bar_or$ca_site)
bar_or$cdl_site = ifelse(bar_or$position%in%aa_pos_cdl, "cdl", "no")
table(bar_or$cdl_site)
top10_or = bar_or[1:10,]
# are these active sites
top10_or$position[top10_or$position%in%active_aa_pos]
# clostest most sig
bar_or_lig = bar_or[bar_or$ligand_distance<10,]
bar_or_lig = bar_or_lig[order(bar_or_lig$ligand_distance, -bar_or_lig$or_mychisq), ]
table(bar_or_lig$signif_fdr)
bar_or_ppi = bar_or[bar_or$interface_dist<10,]
bar_or_ppi = bar_or_ppi[order(bar_or_ppi$interface_dist, -bar_or_ppi$or_mychisq), ]
table(bar_or_ppi$signif_fdr)