LSHTM_analysis/scripts/plotting/plotting_thesis/alr/basic_barplots_alr.R
2022-08-23 21:54:16 +01:00

363 lines
14 KiB
R

#!/usr/bin/env Rscript
#########################################################
# TASK: Barplots
# basic barplots with outcome
# basic barplots with frequency of count of mutations
#########################################################
#=============
# Data: Input
#==============
source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#cat("\nSourced plotting cols as well:", length(plotting_cols))
####################################################
class(merged_df3)
df3 = subset(merged_df3, select = -c(pos_count))
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("plots will output to:", outdir_images)
##########################################################
# blue, red bp
sts = 8
lts = 8
ats = 8
als = 8
ltis = 8
geom_ls = 2.2
#pos_count
subtitle_size = 8
geom_ls_pc = 2.2
leg_text_size = 8
axis_text_size = 8
axis_label_size = 8
###########################################################
#------------------------------
# plot default sizes
#------------------------------
#=========================
# Affinity outcome
# check this var: outcome_cols_affinity
# get from preformatting or put in globals
#==========================
DistCutOff
LigDist_colname # = "ligand_distance" # from globals
ppi2Dist_colname
naDist_colname
###########################################################
# get plotting data within the distance
df3_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
df3_na = df3[df3[[naDist_colname]]<DistCutOff,]
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
#------------------------------
mLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "ligand_outcome"
#, leg_title = "mCSM-lig"
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM\nLig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
mLigP
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
# mmCSM-lig: will be the same no. of sites but the effect will be different
#------------------------------
mmLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "mmcsm_lig_outcome"
#, leg_title = "mmCSM-lig"
#, label_categories = labels_mmlig
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "mmCSM\nLig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
mmLigP
#------------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_ppi2){
ppi2P = stability_count_bp(plotdf = df3_ppi2
, df_colname = "mcsm_ppi2_outcome"
#, leg_title = "mCSM-ppi2"
#, label_categories = labels_ppi2
#, bp_plot_title = paste(common_bp_title, "PP-interface")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM\nPPI2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
ppi2P
}
#----------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_na){
nca_distP = stability_count_bp(plotdf = df3_na
, df_colname = "mcsm_na_outcome"
#, leg_title = "mCSM-NA"
#, label_categories =
#, bp_plot_title = paste(common_bp_title, "Dist to NA")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM\nNA"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
nca_distP
}
#####################################################################
# ------------------------------
# bp site site count: mCSM-lig
# < 10 Ang ligand
# ------------------------------
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
posC_lig = site_snp_count_bp(plotdf = df3_lig
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_lig
#------------------------------
# bp site site count: ppi2
# < 10 Ang interface
#------------------------------
if (tolower(gene)%in%geneL_ppi2){
posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_ppi2
}
#------------------------------
# bp site site count: NCA dist
# < 10 Ang nca
#------------------------------
if (tolower(gene)%in%geneL_na){
posC_nca = site_snp_count_bp(plotdf = df3_na
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_nca
}
#===============================================================
#------------------------------
# bp site site count: ALL
# <10 Ang ligand
#------------------------------
posC_all = site_snp_count_bp(plotdf = df3
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = "All mutations sites"
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_all
##################################################################
consurfP = stability_count_bp(plotdf = df3
, df_colname = "consurf_outcome"
#, leg_title = "ConSurf"
#, label_categories = labels_consurf
, yaxis_title = "Number of nsSNPs"
, leg_position = "top"
, subtitle_text = "ConSurf"
, bar_fill_values = consurf_colours # from globals
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls)
consurfP
##############################################################
sts_so = 10
lts_so = 10
ats_so = 10
als_so = 10
ltis_so = 10
geom_ls_so = 2.5
#===================
# Stability
#===================
# duetP
duetP = stability_count_bp(plotdf = df3
, df_colname = "duet_outcome"
, leg_title = "mCSM-DUET"
#, label_categories = labels_duet
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-DUET"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
duetP
# foldx
foldxP = stability_count_bp(plotdf = df3
, df_colname = "foldx_outcome"
#, leg_title = "FoldX"
#, label_categories = labels_foldx
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "FoldX"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
foldxP
# deepddg
deepddgP = stability_count_bp(plotdf = df3
, df_colname = "deepddg_outcome"
#, leg_title = "DeepDDG"
#, label_categories = labels_deepddg
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "DeepDDG"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
deepddgP
# deepddg
dynamut2P = stability_count_bp(plotdf = df3
, df_colname = "ddg_dynamut2_outcome"
#, leg_title = "Dynamut2"
#, label_categories = labels_ddg_dynamut2_outcome
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "Dynamut2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
dynamut2P
# provean
proveanP = stability_count_bp(plotdf = df3
, df_colname = "provean_outcome"
#, leg_title = "PROVEAN"
#, label_categories = labels_provean
, yaxis_title = "Number of nsSNPs"
, leg_position = "none" # top
, subtitle_text = "PROVEAN"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
proveanP
# snap2
snap2P = stability_count_bp(plotdf = df3
, df_colname = "snap2_outcome"
#, leg_title = "SNAP2"
#, label_categories = labels_snap2
, yaxis_title = ""
, leg_position = "none" # top
, subtitle_text = "SNAP2"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
snap2P
#####################################################################################