372 lines
14 KiB
R
372 lines
14 KiB
R
#!/usr/bin/env Rscript
|
|
#########################################################
|
|
# TASK: Barplots
|
|
# basic barplots with outcome
|
|
# basic barplots with frequency of count of mutations
|
|
#########################################################
|
|
#=============
|
|
# Data: Input
|
|
#==============
|
|
#source("~/git/LSHTM_analysis/config/pnca.R")
|
|
#source("~/git/LSHTM_analysis/config/embb.R")
|
|
#source("~/git/LSHTM_analysis/config/gid.R")
|
|
|
|
#source("~/git/LSHTM_analysis/config/alr.R")
|
|
#source("~/git/LSHTM_analysis/config/katg.R")
|
|
#source("~/git/LSHTM_analysis/config/rpob.R")
|
|
|
|
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
|
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above
|
|
|
|
#cat("\nSourced plotting cols as well:", length(plotting_cols))
|
|
|
|
####################################################
|
|
class(merged_df3)
|
|
|
|
df3 = subset(merged_df3, select = -c(pos_count))
|
|
|
|
#=======
|
|
# output
|
|
#=======
|
|
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
|
cat("plots will output to:", outdir_images)
|
|
|
|
##########################################################
|
|
# blue, red bp
|
|
sts = 8
|
|
lts = 8
|
|
ats = 8
|
|
als = 8
|
|
ltis = 8
|
|
geom_ls = 2.2
|
|
|
|
#pos_count
|
|
subtitle_size = 8
|
|
geom_ls_pc = 2.2
|
|
leg_text_size = 8
|
|
axis_text_size = 8
|
|
axis_label_size = 8
|
|
|
|
###########################################################
|
|
#------------------------------
|
|
# plot default sizes
|
|
#------------------------------
|
|
#=========================
|
|
# Affinity outcome
|
|
# check this var: outcome_cols_affinity
|
|
# get from preformatting or put in globals
|
|
#==========================
|
|
DistCutOff
|
|
LigDist_colname # = "ligand_distance" # from globals
|
|
ppi2Dist_colname
|
|
naDist_colname
|
|
|
|
###########################################################
|
|
# get plotting data within the distance
|
|
df3_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
|
|
df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
|
|
df3_na = df3[df3[[naDist_colname]]<DistCutOff,]
|
|
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
|
|
|
|
#------------------------------
|
|
# barplot for ligand affinity:
|
|
# <10 Ang of ligand
|
|
#------------------------------
|
|
mLigP = stability_count_bp(plotdf = df3_lig
|
|
, df_colname = "ligand_outcome"
|
|
#, leg_title = "mCSM-lig"
|
|
#, bp_plot_title = paste(common_bp_title, "ligand")
|
|
, yaxis_title = "Number of nsSNPs"
|
|
, leg_position = "none"
|
|
, subtitle_text = "mCSM\nLig"
|
|
, bar_fill_values = c("#F8766D", "#00BFC4")
|
|
, subtitle_colour= "black"
|
|
, sts = sts
|
|
, lts = lts
|
|
, ats = ats
|
|
, als = als
|
|
, ltis = ltis
|
|
, geom_ls = geom_ls
|
|
)
|
|
mLigP
|
|
#------------------------------
|
|
# barplot for ligand affinity:
|
|
# <10 Ang of ligand
|
|
# mmCSM-lig: will be the same no. of sites but the effect will be different
|
|
#------------------------------
|
|
mmLigP = stability_count_bp(plotdf = df3_lig
|
|
, df_colname = "mmcsm_lig_outcome"
|
|
#, leg_title = "mmCSM-lig"
|
|
#, label_categories = labels_mmlig
|
|
#, bp_plot_title = paste(common_bp_title, "ligand")
|
|
|
|
, yaxis_title = ""
|
|
, leg_position = "none"
|
|
, subtitle_text = "mmCSM\nLig"
|
|
, bar_fill_values = c("#F8766D", "#00BFC4")
|
|
, subtitle_colour= "black"
|
|
, sts = sts
|
|
, lts = lts
|
|
, ats = ats
|
|
, als = als
|
|
, ltis = ltis
|
|
, geom_ls = geom_ls
|
|
)
|
|
mmLigP
|
|
#------------------------------
|
|
# barplot for ppi2 affinity
|
|
# <10 Ang of interface
|
|
#------------------------------
|
|
if (tolower(gene)%in%geneL_ppi2){
|
|
ppi2P = stability_count_bp(plotdf = df3_ppi2
|
|
, df_colname = "mcsm_ppi2_outcome"
|
|
#, leg_title = "mCSM-ppi2"
|
|
#, label_categories = labels_ppi2
|
|
#, bp_plot_title = paste(common_bp_title, "PP-interface")
|
|
|
|
, yaxis_title = "Number of nsSNPs"
|
|
, leg_position = "none"
|
|
, subtitle_text = "mCSM\nPPI2"
|
|
, bar_fill_values = c("#F8766D", "#00BFC4")
|
|
, subtitle_colour= "black"
|
|
, sts = sts
|
|
, lts = lts
|
|
, ats = ats
|
|
, als = als
|
|
, ltis = ltis
|
|
, geom_ls = geom_ls
|
|
)
|
|
ppi2P
|
|
}
|
|
#----------------------------
|
|
# barplot for ppi2 affinity
|
|
# <10 Ang of interface
|
|
#------------------------------
|
|
if (tolower(gene)%in%geneL_na){
|
|
nca_distP = stability_count_bp(plotdf = df3_na
|
|
, df_colname = "mcsm_na_outcome"
|
|
#, leg_title = "mCSM-NA"
|
|
#, label_categories =
|
|
#, bp_plot_title = paste(common_bp_title, "Dist to NA")
|
|
|
|
, yaxis_title = "Number of nsSNPs"
|
|
, leg_position = "none"
|
|
, subtitle_text = "mCSM\nNA"
|
|
, bar_fill_values = c("#F8766D", "#00BFC4")
|
|
, subtitle_colour= "black"
|
|
, sts = sts
|
|
, lts = lts
|
|
, ats = ats
|
|
, als = als
|
|
, ltis = ltis
|
|
, geom_ls = geom_ls
|
|
)
|
|
nca_distP
|
|
}
|
|
|
|
#####################################################################
|
|
# ------------------------------
|
|
# bp site site count: mCSM-lig
|
|
# < 10 Ang ligand
|
|
# ------------------------------
|
|
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
|
|
|
|
posC_lig = site_snp_count_bp(plotdf = df3_lig
|
|
, df_colname = "position"
|
|
, xaxis_title = "Number of nsSNPs"
|
|
, yaxis_title = "Number of Sites"
|
|
, subtitle_colour = "chocolate4"
|
|
, subtitle_text = ""
|
|
, subtitle_size = subtitle_size
|
|
, geom_ls = geom_ls_pc
|
|
, leg_text_size = leg_text_size
|
|
, axis_text_size = axis_text_size
|
|
, axis_label_size = axis_label_size)
|
|
|
|
posC_lig
|
|
#------------------------------
|
|
# bp site site count: ppi2
|
|
# < 10 Ang interface
|
|
#------------------------------
|
|
if (tolower(gene)%in%geneL_ppi2){
|
|
posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
|
|
, df_colname = "position"
|
|
, xaxis_title = "Number of nsSNPs"
|
|
, yaxis_title = "Number of Sites"
|
|
, subtitle_colour = "chocolate4"
|
|
, subtitle_text = ""
|
|
, subtitle_size = subtitle_size
|
|
, geom_ls = geom_ls_pc
|
|
, leg_text_size = leg_text_size
|
|
, axis_text_size = axis_text_size
|
|
, axis_label_size = axis_label_size)
|
|
posC_ppi2
|
|
}
|
|
|
|
#------------------------------
|
|
# bp site site count: NCA dist
|
|
# < 10 Ang nca
|
|
#------------------------------
|
|
if (tolower(gene)%in%geneL_na){
|
|
posC_nca = site_snp_count_bp(plotdf = df3_na
|
|
, df_colname = "position"
|
|
, xaxis_title = "Number of nsSNPs"
|
|
, yaxis_title = "Number of Sites"
|
|
, subtitle_colour = "chocolate4"
|
|
, subtitle_text = ""
|
|
, subtitle_size = subtitle_size
|
|
, geom_ls = geom_ls_pc
|
|
, leg_text_size = leg_text_size
|
|
, axis_text_size = axis_text_size
|
|
, axis_label_size = axis_label_size)
|
|
posC_nca
|
|
}
|
|
#===============================================================
|
|
#------------------------------
|
|
# bp site site count: ALL
|
|
# <10 Ang ligand
|
|
#------------------------------
|
|
posC_all = site_snp_count_bp(plotdf = df3
|
|
, df_colname = "position"
|
|
, xaxis_title = "Number of nsSNPs"
|
|
, yaxis_title = "Number of Sites"
|
|
, subtitle_colour = "chocolate4"
|
|
, subtitle_text = "All mutations sites"
|
|
, subtitle_size = subtitle_size
|
|
, geom_ls = geom_ls_pc
|
|
, leg_text_size = leg_text_size
|
|
, axis_text_size = axis_text_size
|
|
, axis_label_size = axis_label_size)
|
|
posC_all
|
|
##################################################################
|
|
consurfP = stability_count_bp(plotdf = df3
|
|
, df_colname = "consurf_outcome"
|
|
#, leg_title = "ConSurf"
|
|
#, label_categories = labels_consurf
|
|
, yaxis_title = "Number of nsSNPs"
|
|
, leg_position = "top"
|
|
, subtitle_text = "ConSurf"
|
|
, bar_fill_values = consurf_colours # from globals
|
|
, subtitle_colour= "black"
|
|
, sts = sts
|
|
, lts = lts
|
|
, ats = ats
|
|
, als = als
|
|
, ltis = ltis
|
|
, geom_ls = geom_ls)
|
|
|
|
consurfP
|
|
|
|
##############################################################
|
|
sts_so = 10
|
|
lts_so = 10
|
|
ats_so = 10
|
|
als_so = 10
|
|
ltis_so = 10
|
|
geom_ls_so = 2.5
|
|
#===================
|
|
# Stability
|
|
#===================
|
|
# duetP
|
|
duetP = stability_count_bp(plotdf = df3
|
|
, df_colname = "duet_outcome"
|
|
, leg_title = "mCSM-DUET"
|
|
#, label_categories = labels_duet
|
|
, yaxis_title = "Number of nsSNPs"
|
|
, leg_position = "none"
|
|
, subtitle_text = "mCSM-DUET"
|
|
, bar_fill_values = c("#F8766D", "#00BFC4")
|
|
, subtitle_colour= "black"
|
|
, sts = sts_so
|
|
, lts = lts_so
|
|
, ats = ats_so
|
|
, als = als_so
|
|
, ltis = ltis_so
|
|
, geom_ls = geom_ls_so)
|
|
duetP
|
|
|
|
# foldx
|
|
foldxP = stability_count_bp(plotdf = df3
|
|
, df_colname = "foldx_outcome"
|
|
#, leg_title = "FoldX"
|
|
#, label_categories = labels_foldx
|
|
, yaxis_title = ""
|
|
, leg_position = "none"
|
|
, subtitle_text = "FoldX"
|
|
, bar_fill_values = c("#F8766D", "#00BFC4")
|
|
, sts = sts_so
|
|
, lts = lts_so
|
|
, ats = ats_so
|
|
, als = als_so
|
|
, ltis = ltis_so
|
|
, geom_ls = geom_ls_so)
|
|
foldxP
|
|
|
|
# deepddg
|
|
deepddgP = stability_count_bp(plotdf = df3
|
|
, df_colname = "deepddg_outcome"
|
|
#, leg_title = "DeepDDG"
|
|
#, label_categories = labels_deepddg
|
|
, yaxis_title = ""
|
|
, leg_position = "none"
|
|
, subtitle_text = "DeepDDG"
|
|
, bar_fill_values = c("#F8766D", "#00BFC4")
|
|
, sts = sts_so
|
|
, lts = lts_so
|
|
, ats = ats_so
|
|
, als = als_so
|
|
, ltis = ltis_so
|
|
, geom_ls = geom_ls_so)
|
|
deepddgP
|
|
|
|
# deepddg
|
|
dynamut2P = stability_count_bp(plotdf = df3
|
|
, df_colname = "ddg_dynamut2_outcome"
|
|
#, leg_title = "Dynamut2"
|
|
#, label_categories = labels_ddg_dynamut2_outcome
|
|
, yaxis_title = ""
|
|
, leg_position = "none"
|
|
, subtitle_text = "Dynamut2"
|
|
, bar_fill_values = c("#F8766D", "#00BFC4")
|
|
, sts = sts_so
|
|
, lts = lts_so
|
|
, ats = ats_so
|
|
, als = als_so
|
|
, ltis = ltis_so
|
|
, geom_ls = geom_ls_so)
|
|
dynamut2P
|
|
|
|
# provean
|
|
proveanP = stability_count_bp(plotdf = df3
|
|
, df_colname = "provean_outcome"
|
|
#, leg_title = "PROVEAN"
|
|
#, label_categories = labels_provean
|
|
, yaxis_title = "Number of nsSNPs"
|
|
, leg_position = "none" # top
|
|
, subtitle_text = "PROVEAN"
|
|
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
|
|
, sts = sts_so
|
|
, lts = lts_so
|
|
, ats = ats_so
|
|
, als = als_so
|
|
, ltis = ltis_so
|
|
, geom_ls = geom_ls_so)
|
|
proveanP
|
|
|
|
# snap2
|
|
snap2P = stability_count_bp(plotdf = df3
|
|
, df_colname = "snap2_outcome"
|
|
#, leg_title = "SNAP2"
|
|
#, label_categories = labels_snap2
|
|
, yaxis_title = ""
|
|
, leg_position = "none" # top
|
|
, subtitle_text = "SNAP2"
|
|
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
|
|
, sts = sts_so
|
|
, lts = lts_so
|
|
, ats = ats_so
|
|
, als = als_so
|
|
, ltis = ltis_so
|
|
, geom_ls = geom_ls_so)
|
|
snap2P
|
|
#####################################################################################
|