added dir for embb for consistency and checks and moved others to version1
This commit is contained in:
parent
19b820e316
commit
ac72634b48
19 changed files with 1614 additions and 2 deletions
391
scripts/plotting/plotting_thesis/version1/basic_barplots.R
Normal file
391
scripts/plotting/plotting_thesis/version1/basic_barplots.R
Normal file
|
@ -0,0 +1,391 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Barplots for mCSM DUET, ligand affinity, and foldX
|
||||
# basic barplots with count of mutations
|
||||
# basic barplots with frequency of count of mutations
|
||||
|
||||
# , df_colname = ""
|
||||
# , leg_title = ""
|
||||
# , ats = 25 # axis text size
|
||||
# , als = 22 # axis label size
|
||||
# , lts = 20 # legend text size
|
||||
# , ltis = 22 # label title size
|
||||
# , geom_ls = 10 # geom_label size
|
||||
# , yaxis_title = "Number of nsSNPs"
|
||||
# , bp_plot_title = ""
|
||||
# , label_categories = c("Destabilising", "Stabilising")
|
||||
# , title_colour = "chocolate4"
|
||||
# , subtitle_text = NULL
|
||||
# , sts = 20
|
||||
# , subtitle_colour = "pink"
|
||||
# #, leg_position = c(0.73,0.8) # within plot area
|
||||
# , leg_position = "top"
|
||||
# , bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
#########################################################
|
||||
#=============
|
||||
# Data: Input
|
||||
#==============
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||
|
||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above
|
||||
# sanity check
|
||||
|
||||
cat("\nSourced plotting cols as well:", length(plotting_cols))
|
||||
|
||||
####################################################
|
||||
class(merged_df3)
|
||||
merged_df3 = as.data.frame(merged_df3)
|
||||
|
||||
class(merged_df3)
|
||||
head(merged_df3$pos_count)
|
||||
|
||||
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
|
||||
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
|
||||
head(merged_df3$pos_count)
|
||||
head(merged_df3$df2_pos_count_all)
|
||||
|
||||
# DROP pos_count column
|
||||
# merged_df3$pos_count <-NULL
|
||||
merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
|
||||
head(merged_df3$pos_count)
|
||||
|
||||
df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols]
|
||||
"nca_distance"%in%colnames(df3)
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
||||
cat("plots will output to:", outdir_images)
|
||||
|
||||
###########################################################
|
||||
#------------------------------
|
||||
# plot default sizes
|
||||
#------------------------------
|
||||
#=========================
|
||||
# Affinity outcome
|
||||
# check this var: outcome_cols_affinity
|
||||
# get from preformatting or put in globals
|
||||
#==========================
|
||||
DistCutOff
|
||||
LigDist_colname # = "ligand_distance" # from globals
|
||||
ppi2Dist_colname
|
||||
naDist_colname
|
||||
|
||||
###########################################################
|
||||
# get plotting data within the distance
|
||||
df3_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
|
||||
df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
|
||||
df3_na = df3[df3[[naDist_colname]]<DistCutOff,]
|
||||
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
|
||||
|
||||
#------------------------------
|
||||
# barplot for ligand affinity:
|
||||
# <10 Ang of ligand
|
||||
#------------------------------
|
||||
mLigP = stability_count_bp(plotdf = df3_lig
|
||||
, df_colname = "ligand_outcome"
|
||||
#, leg_title = "mCSM-lig"
|
||||
#, bp_plot_title = paste(common_bp_title, "ligand")
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "mCSM-lig"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5)
|
||||
mLigP
|
||||
#------------------------------
|
||||
# barplot for ligand affinity:
|
||||
# <10 Ang of ligand
|
||||
# mmCSM-lig: will be the same no. of sites but the effect will be different
|
||||
#------------------------------
|
||||
mmLigP = stability_count_bp(plotdf = df3_lig
|
||||
, df_colname = "mmcsm_lig_outcome"
|
||||
#, leg_title = "mmCSM-lig"
|
||||
#, label_categories = labels_mmlig
|
||||
#, bp_plot_title = paste(common_bp_title, "ligand")
|
||||
|
||||
, yaxis_title = ""
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "mmCSM-lig"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
mmLigP
|
||||
#------------------------------
|
||||
# barplot for ppi2 affinity
|
||||
# <10 Ang of interface
|
||||
#------------------------------
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
ppi2P = stability_count_bp(plotdf = df3_ppi2
|
||||
, df_colname = "mcsm_ppi2_outcome"
|
||||
#, leg_title = "mCSM-ppi2"
|
||||
#, label_categories = labels_ppi2
|
||||
#, bp_plot_title = paste(common_bp_title, "PP-interface")
|
||||
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "mCSM-ppi2"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
ppi2P
|
||||
}
|
||||
#----------------------------
|
||||
# barplot for ppi2 affinity
|
||||
# <10 Ang of interface
|
||||
#------------------------------
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
|
||||
nca_distP = stability_count_bp(plotdf = df3_na
|
||||
, df_colname = "mcsm_na_outcome"
|
||||
#, leg_title = "mCSM-NA"
|
||||
#, label_categories =
|
||||
#, bp_plot_title = paste(common_bp_title, "Dist to NA")
|
||||
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "mCSM-NA"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
nca_distP
|
||||
}
|
||||
|
||||
#####################################################################
|
||||
# ------------------------------
|
||||
# bp site site count: mCSM-lig
|
||||
# < 10 Ang ligand
|
||||
# ------------------------------
|
||||
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
|
||||
|
||||
posC_lig = site_snp_count_bp(plotdf = df3_lig
|
||||
, df_colname = "position"
|
||||
, xaxis_title = "Number of nsSNPs"
|
||||
, yaxis_title = "Number of Sites"
|
||||
, subtitle_colour = "chocolate4"
|
||||
, subtitle_text = ""
|
||||
, subtitle_size = 8
|
||||
, geom_ls = 2.6
|
||||
, leg_text_size = 10
|
||||
, axis_text_size = 10
|
||||
, axis_label_size = 10)
|
||||
|
||||
posC_lig
|
||||
#------------------------------
|
||||
# bp site site count: ppi2
|
||||
# < 10 Ang interface
|
||||
#------------------------------
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
|
||||
posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
|
||||
, df_colname = "position"
|
||||
, xaxis_title = "Number of nsSNPs"
|
||||
, yaxis_title = "Number of Sites"
|
||||
, subtitle_colour = "chocolate4"
|
||||
, subtitle_text = ""
|
||||
, subtitle_size = 8
|
||||
, geom_ls = 2.6
|
||||
, leg_text_size = 10
|
||||
, axis_text_size = 10
|
||||
, axis_label_size = 10)
|
||||
posC_ppi2
|
||||
}
|
||||
|
||||
#------------------------------
|
||||
# bp site site count: NCA dist
|
||||
# < 10 Ang nca
|
||||
#------------------------------
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
|
||||
posC_nca = site_snp_count_bp(plotdf = df3_na
|
||||
, df_colname = "position"
|
||||
, xaxis_title = "Number of nsSNPs"
|
||||
, yaxis_title = "Number of Sites"
|
||||
, subtitle_colour = "chocolate4"
|
||||
, subtitle_text = ""
|
||||
, subtitle_size = 8
|
||||
, geom_ls = 2.6
|
||||
, leg_text_size = 10
|
||||
, axis_text_size = 10
|
||||
, axis_label_size = 10)
|
||||
posC_nca
|
||||
}
|
||||
#===============================================================
|
||||
#------------------------------
|
||||
# bp site site count: ALL
|
||||
# <10 Ang ligand
|
||||
#------------------------------
|
||||
posC_all = site_snp_count_bp(plotdf = df3
|
||||
, df_colname = "position"
|
||||
, xaxis_title = "Number of nsSNPs"
|
||||
, yaxis_title = "Number of Sites"
|
||||
, subtitle_colour = "chocolate4"
|
||||
, subtitle_text = "All mutations sites"
|
||||
, subtitle_size = 8
|
||||
, geom_ls = 2.6
|
||||
, leg_text_size = 10
|
||||
, axis_text_size = 10
|
||||
, axis_label_size = 10)
|
||||
posC_all
|
||||
##################################################################
|
||||
consurfP = stability_count_bp(plotdf = df3
|
||||
, df_colname = "consurf_outcome"
|
||||
#, leg_title = "ConSurf"
|
||||
#, label_categories = labels_consurf
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "top"
|
||||
, subtitle_text = "ConSurf"
|
||||
, bar_fill_values = consurf_colours # from globals
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 8
|
||||
, als = 8
|
||||
, ltis = 11
|
||||
, geom_ls = 2)
|
||||
|
||||
consurfP
|
||||
|
||||
##############################################################
|
||||
#===================
|
||||
# Stability
|
||||
#===================
|
||||
# duetP
|
||||
duetP = stability_count_bp(plotdf = df3
|
||||
, df_colname = "duet_outcome"
|
||||
, leg_title = "mCSM-DUET"
|
||||
#, label_categories = labels_duet
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "mCSM-DUET"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
duetP
|
||||
|
||||
# foldx
|
||||
foldxP = stability_count_bp(plotdf = df3
|
||||
, df_colname = "foldx_outcome"
|
||||
#, leg_title = "FoldX"
|
||||
#, label_categories = labels_foldx
|
||||
, yaxis_title = ""
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "FoldX"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
foldxP
|
||||
|
||||
# deepddg
|
||||
deepddgP = stability_count_bp(plotdf = df3
|
||||
, df_colname = "deepddg_outcome"
|
||||
#, leg_title = "DeepDDG"
|
||||
#, label_categories = labels_deepddg
|
||||
, yaxis_title = ""
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "DeepDDG"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
deepddgP
|
||||
|
||||
# deepddg
|
||||
dynamut2P = stability_count_bp(plotdf = df3
|
||||
, df_colname = "ddg_dynamut2_outcome"
|
||||
#, leg_title = "Dynamut2"
|
||||
#, label_categories = labels_ddg_dynamut2_outcome
|
||||
, yaxis_title = ""
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "Dynamut2"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
dynamut2P
|
||||
|
||||
# provean
|
||||
proveanP = stability_count_bp(plotdf = df3
|
||||
, df_colname = "provean_outcome"
|
||||
#, leg_title = "PROVEAN"
|
||||
#, label_categories = labels_provean
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "none" # top
|
||||
, subtitle_text = "PROVEAN"
|
||||
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
proveanP
|
||||
|
||||
# snap2
|
||||
snap2P = stability_count_bp(plotdf = df3
|
||||
, df_colname = "snap2_outcome"
|
||||
#, leg_title = "SNAP2"
|
||||
#, label_categories = labels_snap2
|
||||
, yaxis_title = ""
|
||||
, leg_position = "none" # top
|
||||
, subtitle_text = "SNAP2"
|
||||
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5)
|
||||
snap2P
|
||||
#####################################################################################
|
|
@ -0,0 +1,357 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Barplots
|
||||
# basic barplots with outcome
|
||||
# basic barplots with frequency of count of mutations
|
||||
#########################################################
|
||||
#=============
|
||||
# Data: Input
|
||||
#==============
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||
|
||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above
|
||||
|
||||
cat("\nSourced plotting cols as well:", length(plotting_cols))
|
||||
|
||||
####################################################
|
||||
class(merged_df3)
|
||||
|
||||
df3 = subset(merged_df3, select = -c(pos_count))
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
||||
cat("plots will output to:", outdir_images)
|
||||
|
||||
###########################################################
|
||||
#------------------------------
|
||||
# plot default sizes
|
||||
#------------------------------
|
||||
#=========================
|
||||
# Affinity outcome
|
||||
# check this var: outcome_cols_affinity
|
||||
# get from preformatting or put in globals
|
||||
#==========================
|
||||
DistCutOff
|
||||
LigDist_colname # = "ligand_distance" # from globals
|
||||
ppi2Dist_colname
|
||||
naDist_colname
|
||||
|
||||
###########################################################
|
||||
# get plotting data within the distance
|
||||
df3_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
|
||||
df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
|
||||
df3_na = df3[df3[[naDist_colname]]<DistCutOff,]
|
||||
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
|
||||
|
||||
#------------------------------
|
||||
# barplot for ligand affinity:
|
||||
# <10 Ang of ligand
|
||||
#------------------------------
|
||||
mLigP = stability_count_bp(plotdf = df3_lig
|
||||
, df_colname = "ligand_outcome"
|
||||
#, leg_title = "mCSM-lig"
|
||||
#, bp_plot_title = paste(common_bp_title, "ligand")
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "mCSM-lig"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5)
|
||||
mLigP
|
||||
#------------------------------
|
||||
# barplot for ligand affinity:
|
||||
# <10 Ang of ligand
|
||||
# mmCSM-lig: will be the same no. of sites but the effect will be different
|
||||
#------------------------------
|
||||
mmLigP = stability_count_bp(plotdf = df3_lig
|
||||
, df_colname = "mmcsm_lig_outcome"
|
||||
#, leg_title = "mmCSM-lig"
|
||||
#, label_categories = labels_mmlig
|
||||
#, bp_plot_title = paste(common_bp_title, "ligand")
|
||||
|
||||
, yaxis_title = ""
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "mmCSM-lig"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
mmLigP
|
||||
#------------------------------
|
||||
# barplot for ppi2 affinity
|
||||
# <10 Ang of interface
|
||||
#------------------------------
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
ppi2P = stability_count_bp(plotdf = df3_ppi2
|
||||
, df_colname = "mcsm_ppi2_outcome"
|
||||
#, leg_title = "mCSM-ppi2"
|
||||
#, label_categories = labels_ppi2
|
||||
#, bp_plot_title = paste(common_bp_title, "PP-interface")
|
||||
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "mCSM-ppi2"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
ppi2P
|
||||
}
|
||||
#----------------------------
|
||||
# barplot for ppi2 affinity
|
||||
# <10 Ang of interface
|
||||
#------------------------------
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
|
||||
nca_distP = stability_count_bp(plotdf = df3_na
|
||||
, df_colname = "mcsm_na_outcome"
|
||||
#, leg_title = "mCSM-NA"
|
||||
#, label_categories =
|
||||
#, bp_plot_title = paste(common_bp_title, "Dist to NA")
|
||||
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "mCSM-NA"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
nca_distP
|
||||
}
|
||||
|
||||
#####################################################################
|
||||
# ------------------------------
|
||||
# bp site site count: mCSM-lig
|
||||
# < 10 Ang ligand
|
||||
# ------------------------------
|
||||
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
|
||||
|
||||
posC_lig = site_snp_count_bp(plotdf = df3_lig
|
||||
, df_colname = "position"
|
||||
, xaxis_title = "Number of nsSNPs"
|
||||
, yaxis_title = "Number of Sites"
|
||||
, subtitle_colour = "chocolate4"
|
||||
, subtitle_text = ""
|
||||
, subtitle_size = 8
|
||||
, geom_ls = 2.6
|
||||
, leg_text_size = 10
|
||||
, axis_text_size = 10
|
||||
, axis_label_size = 10)
|
||||
|
||||
posC_lig
|
||||
#------------------------------
|
||||
# bp site site count: ppi2
|
||||
# < 10 Ang interface
|
||||
#------------------------------
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
|
||||
posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
|
||||
, df_colname = "position"
|
||||
, xaxis_title = "Number of nsSNPs"
|
||||
, yaxis_title = "Number of Sites"
|
||||
, subtitle_colour = "chocolate4"
|
||||
, subtitle_text = ""
|
||||
, subtitle_size = 8
|
||||
, geom_ls = 2.6
|
||||
, leg_text_size = 10
|
||||
, axis_text_size = 10
|
||||
, axis_label_size = 10)
|
||||
posC_ppi2
|
||||
}
|
||||
|
||||
#------------------------------
|
||||
# bp site site count: NCA dist
|
||||
# < 10 Ang nca
|
||||
#------------------------------
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
|
||||
posC_nca = site_snp_count_bp(plotdf = df3_na
|
||||
, df_colname = "position"
|
||||
, xaxis_title = "Number of nsSNPs"
|
||||
, yaxis_title = "Number of Sites"
|
||||
, subtitle_colour = "chocolate4"
|
||||
, subtitle_text = ""
|
||||
, subtitle_size = 8
|
||||
, geom_ls = 2.6
|
||||
, leg_text_size = 10
|
||||
, axis_text_size = 10
|
||||
, axis_label_size = 10)
|
||||
posC_nca
|
||||
}
|
||||
#===============================================================
|
||||
#------------------------------
|
||||
# bp site site count: ALL
|
||||
# <10 Ang ligand
|
||||
#------------------------------
|
||||
posC_all = site_snp_count_bp(plotdf = df3
|
||||
, df_colname = "position"
|
||||
, xaxis_title = "Number of nsSNPs"
|
||||
, yaxis_title = "Number of Sites"
|
||||
, subtitle_colour = "chocolate4"
|
||||
, subtitle_text = "All mutations sites"
|
||||
, subtitle_size = 8
|
||||
, geom_ls = 2.6
|
||||
, leg_text_size = 10
|
||||
, axis_text_size = 10
|
||||
, axis_label_size = 10)
|
||||
posC_all
|
||||
##################################################################
|
||||
consurfP = stability_count_bp(plotdf = df3
|
||||
, df_colname = "consurf_outcome"
|
||||
#, leg_title = "ConSurf"
|
||||
#, label_categories = labels_consurf
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "top"
|
||||
, subtitle_text = "ConSurf"
|
||||
, bar_fill_values = consurf_colours # from globals
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 8
|
||||
, als = 8
|
||||
, ltis = 11
|
||||
, geom_ls = 2)
|
||||
|
||||
consurfP
|
||||
|
||||
##############################################################
|
||||
#===================
|
||||
# Stability
|
||||
#===================
|
||||
# duetP
|
||||
duetP = stability_count_bp(plotdf = df3
|
||||
, df_colname = "duet_outcome"
|
||||
, leg_title = "mCSM-DUET"
|
||||
#, label_categories = labels_duet
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "mCSM-DUET"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
duetP
|
||||
|
||||
# foldx
|
||||
foldxP = stability_count_bp(plotdf = df3
|
||||
, df_colname = "foldx_outcome"
|
||||
#, leg_title = "FoldX"
|
||||
#, label_categories = labels_foldx
|
||||
, yaxis_title = ""
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "FoldX"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
foldxP
|
||||
|
||||
# deepddg
|
||||
deepddgP = stability_count_bp(plotdf = df3
|
||||
, df_colname = "deepddg_outcome"
|
||||
#, leg_title = "DeepDDG"
|
||||
#, label_categories = labels_deepddg
|
||||
, yaxis_title = ""
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "DeepDDG"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
deepddgP
|
||||
|
||||
# deepddg
|
||||
dynamut2P = stability_count_bp(plotdf = df3
|
||||
, df_colname = "ddg_dynamut2_outcome"
|
||||
#, leg_title = "Dynamut2"
|
||||
#, label_categories = labels_ddg_dynamut2_outcome
|
||||
, yaxis_title = ""
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "Dynamut2"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
dynamut2P
|
||||
|
||||
# provean
|
||||
proveanP = stability_count_bp(plotdf = df3
|
||||
, df_colname = "provean_outcome"
|
||||
#, leg_title = "PROVEAN"
|
||||
#, label_categories = labels_provean
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "none" # top
|
||||
, subtitle_text = "PROVEAN"
|
||||
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5
|
||||
)
|
||||
proveanP
|
||||
|
||||
# snap2
|
||||
snap2P = stability_count_bp(plotdf = df3
|
||||
, df_colname = "snap2_outcome"
|
||||
#, leg_title = "SNAP2"
|
||||
#, label_categories = labels_snap2
|
||||
, yaxis_title = ""
|
||||
, leg_position = "none" # top
|
||||
, subtitle_text = "SNAP2"
|
||||
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 12
|
||||
, als = 11
|
||||
, ltis = 11
|
||||
, geom_ls = 2.5)
|
||||
snap2P
|
||||
#####################################################################################
|
|
@ -0,0 +1,270 @@
|
|||
duetP
|
||||
foldxP
|
||||
deepddgP
|
||||
dynamut2P
|
||||
proveanP
|
||||
snap2P
|
||||
|
||||
mLigP
|
||||
mmLigP
|
||||
posC_lig
|
||||
ppi2P
|
||||
posC_ppi2
|
||||
sensP
|
||||
peP
|
||||
|
||||
#========================
|
||||
# Common title settings
|
||||
#=========================
|
||||
theme_georgia <- function(...) {
|
||||
theme_gray(base_family = "sans", ...) +
|
||||
theme(plot.title = element_text(face = "bold"))
|
||||
}
|
||||
title_theme <- calc_element("plot.title", theme_georgia())
|
||||
|
||||
###############################################################
|
||||
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
|
||||
|
||||
# extract common legends
|
||||
# lig affinity
|
||||
common_legend_outcome = get_legend(mLigP +
|
||||
guides(color = guide_legend(nrow = 1)) +
|
||||
theme(legend.position = "top"))
|
||||
|
||||
# stability
|
||||
common_legend_outcome = get_legend(duetP +
|
||||
guides(color = guide_legend(nrow = 1)) +
|
||||
theme(legend.position = "top"))
|
||||
# conservation
|
||||
cons_common_legend_outcome = get_legend(snap2P +
|
||||
guides(color = guide_legend(nrow = 1)) +
|
||||
theme(legend.position = "top"))
|
||||
###################################################################
|
||||
#==================================
|
||||
# Stability+Consevation: COMBINE
|
||||
#==================================
|
||||
tt_size = 10
|
||||
#----------------------------
|
||||
# stability and consv title
|
||||
#----------------------------
|
||||
tt_size = 10
|
||||
tt_stab = ggdraw() +
|
||||
draw_label(
|
||||
paste0("Stability outcome"),
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
#size = title_theme$size
|
||||
size = tt_size
|
||||
)
|
||||
|
||||
tt_cons = ggdraw() +
|
||||
draw_label(
|
||||
paste0("Conservation outcome"),
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
size = tt_size
|
||||
)
|
||||
|
||||
#----------------------
|
||||
# Output plot
|
||||
#-----------------------
|
||||
stab_cons_CLP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_stab_cons_BP_CLP.png")
|
||||
|
||||
print(paste0("plot filename:", stab_cons_CLP))
|
||||
png(stab_cons_CLP, units = "in", width = 10, height = 5, res = 300 )
|
||||
|
||||
cowplot::plot_grid(
|
||||
cowplot::plot_grid(
|
||||
cowplot::plot_grid(
|
||||
tt_stab,
|
||||
common_legend_outcome,
|
||||
nrow = 2
|
||||
),
|
||||
cowplot::plot_grid(
|
||||
duetP,
|
||||
foldxP,
|
||||
deepddgP,
|
||||
dynamut2P,
|
||||
nrow = 1,
|
||||
labels = c("A", "B", "C", "D"),
|
||||
label_size = 12),
|
||||
nrow = 2,
|
||||
rel_heights=c(1,10)
|
||||
),
|
||||
NULL,
|
||||
cowplot::plot_grid(
|
||||
cowplot::plot_grid(
|
||||
cowplot::plot_grid(
|
||||
tt_cons,
|
||||
cons_common_legend_outcome,
|
||||
nrow = 2
|
||||
),
|
||||
cowplot::plot_grid(
|
||||
proveanP,
|
||||
snap2P,
|
||||
nrow=1,
|
||||
labels = c("E", "F"),
|
||||
align = "hv"),
|
||||
nrow = 2,
|
||||
rel_heights = c(1, 10),
|
||||
label_size = 12),
|
||||
nrow=1
|
||||
),
|
||||
rel_widths = c(2,0.15,1),
|
||||
nrow=1
|
||||
)
|
||||
|
||||
dev.off()
|
||||
|
||||
#################################################################
|
||||
#=======================================
|
||||
# Affinity barplots: COMBINE ALL three
|
||||
#========================================
|
||||
|
||||
ligT = paste0(common_bp_title, " ligand")
|
||||
lig_affT = ggdraw() +
|
||||
draw_label(
|
||||
ligT,
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
#size = title_theme$size
|
||||
size = 8
|
||||
)
|
||||
|
||||
p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT
|
||||
, common_legend_outcome
|
||||
, nrow=2),
|
||||
cowplot::plot_grid(mLigP, mmLigP, posC_lig
|
||||
, nrow = 1
|
||||
, rel_widths = c(1,1,1.8)
|
||||
, align = "h"),
|
||||
nrow = 2,
|
||||
rel_heights = c(1,8)
|
||||
|
||||
)
|
||||
#p1
|
||||
###########################################################
|
||||
ppi2T = paste0(common_bp_title, " PP-interface")
|
||||
ppi2_affT = ggdraw() +
|
||||
draw_label(
|
||||
ppi2T,
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
#size = title_theme$size
|
||||
size = 8
|
||||
)
|
||||
###########################################################
|
||||
p2 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nrow=2),
|
||||
cowplot::plot_grid(ppi2P, posC_ppi2
|
||||
, nrow = 1
|
||||
, rel_widths = c(1.2,1.8)
|
||||
, align = "h"),
|
||||
nrow = 2,
|
||||
rel_heights = c(1,8)
|
||||
)
|
||||
#p2
|
||||
###########################################################
|
||||
# PE + All position count
|
||||
peT_allT = ggdraw() +
|
||||
draw_label(
|
||||
paste0("All mutation sites"),
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
#size = title_theme$size
|
||||
size = 8
|
||||
)
|
||||
|
||||
p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
|
||||
, rel_widths = c(1,3),axis = "lr"),
|
||||
cowplot::plot_grid(
|
||||
peP2, posC_all,
|
||||
nrow = 2,
|
||||
rel_widths = c(1,1),
|
||||
align = "v",
|
||||
axis = "lr",
|
||||
rel_heights = c(1,8)
|
||||
),
|
||||
rel_heights = c(1,18),
|
||||
nrow = 2,axis = "lr")
|
||||
p3
|
||||
#===============
|
||||
# Final combine
|
||||
#===============
|
||||
w = 11.79
|
||||
h = 3.5
|
||||
mut_impact_CLP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_mut_impactCLP.png")
|
||||
|
||||
#svg(affP, width = 20, height = 5.5)
|
||||
print(paste0("plot filename:", mut_impact_CLP))
|
||||
png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
|
||||
|
||||
cowplot::plot_grid(p1, p2, p3
|
||||
, nrow = 1
|
||||
, labels = "AUTO"
|
||||
, label_size = 12
|
||||
, rel_widths = c(3,2,2)
|
||||
#, rel_heights = c(1)
|
||||
)
|
||||
|
||||
dev.off()
|
||||
##################################################
|
||||
sensP
|
||||
consurfP
|
||||
#=================
|
||||
# Combine sensitivity + ConSurf
|
||||
# or ConSurf
|
||||
#=================
|
||||
w = 3
|
||||
h = 3
|
||||
# sens_conP = paste0(outdir_images
|
||||
# ,tolower(gene)
|
||||
# ,"_sens_cons_CLP.png")
|
||||
#
|
||||
# print(paste0("plot filename:", sens_conP))
|
||||
# png(sens_conP, units = "in", width = w, height = h, res = 300 )
|
||||
#
|
||||
# cowplot::plot_grid(sensP, consurfP,
|
||||
# nrow = 2,
|
||||
# rel_heights = c(1, 1.5)
|
||||
# )
|
||||
#
|
||||
# dev.off()
|
||||
|
||||
conCLP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_consurf_BP.png")
|
||||
|
||||
print(paste0("plot filename:", conCLP))
|
||||
png(conCLP, units = "in", width = w, height = h, res = 300 )
|
||||
consurfP
|
||||
|
||||
dev.off()
|
||||
#================================
|
||||
# Sensitivity mutation numbers: geom_tile
|
||||
#================================
|
||||
sensCLP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_sensN_tile.png")
|
||||
|
||||
print(paste0("plot filename:", sensCLP))
|
||||
png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
|
||||
sensP
|
||||
dev.off()
|
||||
#================================
|
||||
# Sensitivity SITE numbers: geom_tile
|
||||
#================================
|
||||
sens_siteCLP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_sens_siteC_tile.png")
|
||||
|
||||
print(paste0("plot filename:", sens_siteCLP))
|
||||
png(sens_siteCLP, units = "in", width = 1, height = 1, res = 300 )
|
||||
sens_siteP
|
||||
dev.off()
|
||||
|
||||
###########################################################
|
||||
|
347
scripts/plotting/plotting_thesis/version1/corr_plots_thesis.R
Normal file
347
scripts/plotting/plotting_thesis/version1/corr_plots_thesis.R
Normal file
|
@ -0,0 +1,347 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||
source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
|
||||
# get plottting dfs
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
||||
####################################################
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
||||
|
||||
#=======
|
||||
# Input
|
||||
#=======
|
||||
merged_df3 = as.data.frame(merged_df3)
|
||||
corr_plotdf = corr_data_extract(merged_df3
|
||||
, gene = gene
|
||||
, drug = drug
|
||||
, extract_scaled_cols = F)
|
||||
colnames(corr_plotdf)
|
||||
|
||||
if (all(colnames(corr_df_m3_f) == colnames(corr_plotdf))){
|
||||
cat("PASS: corr plot colnames match for dashboard")
|
||||
}else{
|
||||
stop("Abort: corr plot colnames DO NOT match for dashboard")
|
||||
}
|
||||
|
||||
#corr_plotdf = corr_df_m3_f #for downstream code
|
||||
|
||||
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
|
||||
aff_dist_cols
|
||||
|
||||
|
||||
static_cols = c("Log10(MAF)"
|
||||
, "Log10(OR)"
|
||||
#, "-Log10(P)"
|
||||
)
|
||||
|
||||
#================
|
||||
# stability
|
||||
#================
|
||||
#affinity_dist_colnames# lIg DIst and ppi Di
|
||||
corr_ps_colnames = c(static_cols
|
||||
, "DUET"
|
||||
, "FoldX"
|
||||
, "DeepDDG"
|
||||
, "Dynamut2"
|
||||
, aff_dist_cols
|
||||
, "dst_mode")
|
||||
|
||||
if (all(corr_ps_colnames%in%colnames(corr_plotdf))){
|
||||
cat("PASS: all colnames exist for correlation")
|
||||
}else{
|
||||
stop("Abort: all colnames DO NOT exist for correlation")
|
||||
}
|
||||
corr_df_ps = corr_plotdf[, corr_ps_colnames]
|
||||
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
|
||||
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_ps)
|
||||
|
||||
color_coln = which(colnames(corr_df_ps) == "dst_mode")
|
||||
#end = which(colnames(corr_df_ps) == drug)
|
||||
#ncol_omit = 2
|
||||
#corr_end = end-ncol_omit
|
||||
corr_end = color_coln-1
|
||||
|
||||
#------------------------
|
||||
# Output: stability corrP
|
||||
#------------------------
|
||||
corr_psP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_stability.svg" )
|
||||
|
||||
cat("Corr plot stability with coloured dots:", corr_psP)
|
||||
svg(corr_psP, width = 15, height = 15)
|
||||
|
||||
my_corr_pairs(corr_data_all = corr_df_ps
|
||||
, corr_cols = colnames(corr_df_ps[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
, hist_col = "coral4"
|
||||
, dot_size = 1.6
|
||||
, ats = 1.5
|
||||
, corr_lab_size =2.5
|
||||
, corr_value_size = 1)
|
||||
|
||||
dev.off()
|
||||
#===============
|
||||
# CONSERVATION
|
||||
#==============
|
||||
corr_conservation_cols = c( static_cols
|
||||
, "ConSurf"
|
||||
, "SNAP2"
|
||||
, "PROVEAN"
|
||||
, aff_dist_cols
|
||||
, "dst_mode"
|
||||
, drug)
|
||||
|
||||
if (all(corr_conservation_cols%in%colnames(corr_plotdf))){
|
||||
cat("PASS: all colnames exist for ConSurf-correlation")
|
||||
}else{
|
||||
stop("Abort: all colnames DO NOT exist for ConSurf-correlation")
|
||||
}
|
||||
|
||||
corr_df_cons = corr_plotdf[, corr_conservation_cols]
|
||||
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
|
||||
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_cons)
|
||||
|
||||
color_coln = which(colnames(corr_df_cons) == "dst_mode")
|
||||
# end = which(colnames(corr_df_cons) == drug)
|
||||
# ncol_omit = 2
|
||||
# corr_end = end-ncol_omit
|
||||
corr_end = color_coln-1
|
||||
|
||||
|
||||
#---------------------------
|
||||
# Output: Conservation corrP
|
||||
#----------------------------
|
||||
corr_consP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_conservation.svg" )
|
||||
|
||||
cat("Corr plot conservation coloured dots:", corr_consP)
|
||||
svg(corr_consP, width = 10, height = 10)
|
||||
|
||||
my_corr_pairs(corr_data_all = corr_df_cons
|
||||
, corr_cols = colnames(corr_df_cons[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
, hist_col = "coral4"
|
||||
, dot_size =1.1
|
||||
, ats = 1.5
|
||||
, corr_lab_size = 1.8
|
||||
, corr_value_size = 1)
|
||||
|
||||
dev.off()
|
||||
|
||||
#####################################################
|
||||
#DistCutOff = 10
|
||||
#LigDist_colname # = "ligand_distance" # from globals
|
||||
#ppi2Dist_colname = "interface_dist"
|
||||
#naDist_colname = "TBC"
|
||||
#####################################################
|
||||
|
||||
#================
|
||||
# ligand affinity
|
||||
#================
|
||||
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
|
||||
|
||||
corr_lig_colnames = c(static_cols
|
||||
, "mCSM-lig"
|
||||
, "mmCSM-lig"
|
||||
, "dst_mode")
|
||||
#, drug)
|
||||
|
||||
if (all(corr_lig_colnames%in%colnames(corr_plotdf))){
|
||||
cat("PASS: all colnames exist for Lig-correlation")
|
||||
}else{
|
||||
stop("Abort: all colnames DO NOT exist for Lig-correlation")
|
||||
}
|
||||
|
||||
corr_df_lig = corr_plotdf[, corr_lig_colnames]
|
||||
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
|
||||
cat("\nComplete muts for lig affinity for", gene, ":", complete_obs_lig)
|
||||
|
||||
color_coln = which(colnames(corr_df_lig) == "dst_mode")
|
||||
# end = which(colnames(corr_df_lig) == drug)
|
||||
# ncol_omit = 2
|
||||
# corr_end = end-ncol_omit
|
||||
corr_end = color_coln-1
|
||||
|
||||
#------------------------
|
||||
# Output: ligand corrP
|
||||
#------------------------
|
||||
corr_ligP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_lig.svg" )
|
||||
|
||||
cat("Corr plot affinity with coloured dots:", corr_ligP)
|
||||
svg(corr_ligP, width = 10, height = 10)
|
||||
|
||||
my_corr_pairs(corr_data_all = corr_df_lig
|
||||
, corr_cols = colnames(corr_df_lig[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
, hist_col = "coral4"
|
||||
, dot_size = 2
|
||||
, ats = 1.5
|
||||
, corr_lab_size =3
|
||||
, corr_value_size = 1)
|
||||
dev.off()
|
||||
####################################################
|
||||
#================
|
||||
# ppi2 affinity
|
||||
#================
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
|
||||
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
|
||||
|
||||
corr_ppi2_colnames = c(static_cols
|
||||
, "mCSM-PPI2"
|
||||
, "dst_mode"
|
||||
, drug)
|
||||
|
||||
if (all(corr_ppi2_colnames%in%colnames(corr_plotdf))){
|
||||
cat("PASS: all colnames exist for mcsm-ppi2 correlation")
|
||||
}else{
|
||||
stop("Abort: all colnames DO NOT exist for mcsm-ppi2 correlation")
|
||||
}
|
||||
|
||||
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
|
||||
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
|
||||
cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
|
||||
|
||||
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
|
||||
# end = which(colnames(corr_df_ppi2) == drug)
|
||||
# ncol_omit = 2
|
||||
# corr_end = end-ncol_omit
|
||||
corr_end = color_coln-1
|
||||
|
||||
#------------------------
|
||||
# Output: ppi2 corrP
|
||||
#------------------------
|
||||
corr_ppi2P = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_ppi2.svg" )
|
||||
|
||||
cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
|
||||
svg(corr_ppi2P, width = 10, height = 10)
|
||||
|
||||
my_corr_pairs(corr_data_all = corr_df_ppi2
|
||||
, corr_cols = colnames(corr_df_ppi2[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
, hist_col = "coral4"
|
||||
, dot_size = 2
|
||||
, ats = 1.5
|
||||
, corr_lab_size = 3
|
||||
, corr_value_size = 1)
|
||||
|
||||
dev.off()
|
||||
}
|
||||
|
||||
# FIXME: ADD distance
|
||||
#==================
|
||||
# mCSSM-NA affinity
|
||||
#==================
|
||||
#================
|
||||
# NA affinity
|
||||
#================
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
corr_df_na = corr_df_na[corr_df_na["NCA-Dist"]<DistCutOff,]
|
||||
|
||||
corr_na_colnames = c(static_cols
|
||||
, "mCSM-NA"
|
||||
, "dst_mode"
|
||||
, drug)
|
||||
|
||||
if (all(corr_na_colnames%in%colnames(corr_plotdf))){
|
||||
cat("PASS: all colnames exist for mcsm-NA-correlation")
|
||||
}else{
|
||||
stop("Abort: all colnames DO NOT exist for mcsm-NA-correlation")
|
||||
}
|
||||
|
||||
corr_na_colnames%in%colnames(corr_plotdf)
|
||||
corr_df_na = corr_plotdf[, corr_na_colnames]
|
||||
complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
|
||||
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
|
||||
|
||||
color_coln = which(colnames(corr_df_na) == "dst_mode")
|
||||
# end = which(colnames(corr_df_na) == drug)
|
||||
# ncol_omit = 2
|
||||
# corr_end = end-ncol_omit
|
||||
corr_end = color_coln-1
|
||||
|
||||
#------------------------
|
||||
# Output: mCSM-NA corrP
|
||||
#------------------------
|
||||
corr_naP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_na.svg" )
|
||||
|
||||
cat("Corr plot mCSM-NA with coloured dots:", corr_naP)
|
||||
svg(corr_naP, width = 10, height = 10)
|
||||
|
||||
my_corr_pairs(corr_data_all = corr_df_na
|
||||
, corr_cols = colnames(corr_df_na[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_na[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
, hist_col = "coral4"
|
||||
, dot_size = 2
|
||||
, ats = 1.5
|
||||
, corr_lab_size = 3
|
||||
, corr_value_size = 1)
|
||||
|
||||
dev.off()
|
||||
}
|
||||
####################################################
|
||||
#===============
|
||||
#ggpairs:
|
||||
#================
|
||||
#corr_df_ps$dst_mode = ifelse(corr_df_cons$dst_mode=="1", "R", "S")
|
||||
|
||||
svg('/tmp/foo.svg', width=10, height=10, )
|
||||
|
||||
corr_plotting_df = corr_df_ps
|
||||
|
||||
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
|
||||
upper = list(continuous = wrap('cor',
|
||||
method = "spearman",
|
||||
title="ρ",
|
||||
digits=2,
|
||||
title_args=c(colour="black")
|
||||
)
|
||||
),
|
||||
lower = list(
|
||||
continuous = wrap("points", alpha = 0.7, size=0.5),
|
||||
combo = wrap("dot", alpha = 0.7, size=0.5)
|
||||
),
|
||||
aes(colour = factor(ifelse(corr_plotting_df$dst_mode==0, "S", "R")), alpha = 0.5),
|
||||
title="Stability") +
|
||||
scale_colour_manual(values = c("red", "blue")) +
|
||||
scale_fill_manual(values = c("red", "blue")) +
|
||||
theme(
|
||||
text = element_text(size=12, face="bold")
|
||||
)
|
||||
|
||||
dev.off()
|
||||
|
||||
|
||||
#
|
164
scripts/plotting/plotting_thesis/version1/dm_om_plots_layout.R
Normal file
164
scripts/plotting/plotting_thesis/version1/dm_om_plots_layout.R
Normal file
|
@ -0,0 +1,164 @@
|
|||
# source dm_om_plots.R
|
||||
#============
|
||||
# Plot labels
|
||||
#============
|
||||
tit1 = "Stability changes"
|
||||
tit2 = "Genomic measure"
|
||||
tit3 = "Distance to partners"
|
||||
tit4 = "Evolutionary Conservation"
|
||||
tit5 = "Affinity changes"
|
||||
pt_size = 30
|
||||
|
||||
theme_georgia <- function(...) {
|
||||
theme_gray(base_family = "sans", ...) +
|
||||
theme(plot.title = element_text(face = "bold"))
|
||||
}
|
||||
|
||||
|
||||
title_theme <- calc_element("plot.title", theme_georgia())
|
||||
|
||||
pt1 = ggdraw() +
|
||||
draw_label(
|
||||
tit1,
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
#size = title_theme$size
|
||||
size = pt_size
|
||||
)
|
||||
|
||||
pt2 = ggdraw() +
|
||||
draw_label(
|
||||
tit2,
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
size = pt_size
|
||||
)
|
||||
|
||||
pt3 = ggdraw() +
|
||||
draw_label(
|
||||
tit3,
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
size = pt_size
|
||||
)
|
||||
|
||||
pt4 = ggdraw() +
|
||||
draw_label(
|
||||
tit4,
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
size = pt_size
|
||||
)
|
||||
|
||||
|
||||
pt5 = ggdraw() +
|
||||
draw_label(
|
||||
tit5,
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
size = pt_size
|
||||
)
|
||||
|
||||
#======================
|
||||
# Output plot function
|
||||
#======================
|
||||
OutPlot_dm_om = function(x){
|
||||
|
||||
# dist b/w plot title and plot
|
||||
relH_tp = c(0.08, 0.92)
|
||||
|
||||
my_label_size = 25
|
||||
#----------------
|
||||
# Top panel
|
||||
#----------------
|
||||
top_panel = cowplot::plot_grid(
|
||||
cowplot::plot_grid(pt1,
|
||||
cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P
|
||||
, nrow = 1
|
||||
, labels = c("A", "B", "C", "D")
|
||||
, label_size = my_label_size)
|
||||
, ncol = 1
|
||||
, rel_heights = relH_tp
|
||||
),
|
||||
NULL,
|
||||
cowplot::plot_grid(pt2,
|
||||
cowplot::plot_grid(genomicsP
|
||||
, nrow = 1
|
||||
, labels = c("E")
|
||||
, label_size = my_label_size)
|
||||
, ncol = 1
|
||||
, rel_heights = relH_tp
|
||||
),
|
||||
NULL,
|
||||
cowplot::plot_grid(pt3,
|
||||
cowplot::plot_grid( #distanceP
|
||||
distanceP_lig
|
||||
#, distanceP_ppi2
|
||||
, distanceP_na
|
||||
, nrow = 1
|
||||
, labels = c("F", "G")
|
||||
, label_size = my_label_size)
|
||||
, ncol = 1
|
||||
, rel_heights = relH_tp
|
||||
),
|
||||
nrow = 1,
|
||||
rel_widths = c(2/7, 0.1/7, 0.5/7, 0.1/7, 1/7)
|
||||
)
|
||||
|
||||
#----------------
|
||||
# Bottom panel
|
||||
#----------------
|
||||
bottom_panel = cowplot::plot_grid(
|
||||
cowplot::plot_grid(pt4,
|
||||
cowplot::plot_grid(consurfP, proveanP, snap2P
|
||||
, nrow = 1
|
||||
, labels = c("H", "I", "J")
|
||||
, label_size = my_label_size)
|
||||
, ncol = 1
|
||||
, rel_heights =relH_tp
|
||||
),NULL,
|
||||
cowplot::plot_grid(pt5,
|
||||
cowplot::plot_grid(mcsmligP, mcsmlig2P
|
||||
#, mcsmppi2P
|
||||
, mcsmnaP
|
||||
, nrow = 1
|
||||
, labels = c("K", "L", "M")
|
||||
, label_size = my_label_size)
|
||||
, ncol = 1
|
||||
, rel_heights = relH_tp
|
||||
),NULL,
|
||||
nrow = 1,
|
||||
rel_widths = c(3/6,0.1/6,3/6, 0.1/6 )
|
||||
)
|
||||
|
||||
#-------------------------------
|
||||
# combine: Top and Bottom panel
|
||||
#-------------------------------
|
||||
cowplot::plot_grid (top_panel, bottom_panel
|
||||
, nrow =2
|
||||
, rel_widths = c(1, 1)
|
||||
, align = "hv")
|
||||
}
|
||||
|
||||
#=====================
|
||||
# OutPlot: svg and png
|
||||
#======================
|
||||
dm_om_combinedP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_dm_om_all.svg")
|
||||
|
||||
cat("DM OM plots with stats:", dm_om_combinedP)
|
||||
svg(dm_om_combinedP, width = 32, height = 18)
|
||||
|
||||
OutPlot_dm_om()
|
||||
dev.off()
|
||||
|
||||
|
||||
dm_om_combinedP_png = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_dm_om_all.png")
|
||||
cat("DM OM plots with stats:", dm_om_combinedP_png)
|
||||
png(dm_om_combinedP_png, width = 32, height = 18, units = "in", res = 300)
|
||||
|
||||
OutPlot_dm_om()
|
||||
dev.off()
|
56
scripts/plotting/plotting_thesis/version1/gg_pairs.R
Normal file
56
scripts/plotting/plotting_thesis/version1/gg_pairs.R
Normal file
|
@ -0,0 +1,56 @@
|
|||
# Tweak for layout, fonts, and text sizes.
|
||||
#svg('~/tmp/foo.svg', width=10, height=10, )
|
||||
|
||||
|
||||
# Set the width/height to inches for print. 300 dpi is reasonably ok for "draft"
|
||||
# output. To raise quality while preserving sanity, increase 'res' and
|
||||
# DO NOT alter font/point/line sizes
|
||||
|
||||
|
||||
|
||||
#- [X] Black text for "Corr:" or replace with Rho symbol
|
||||
#- [X] 0/1 == R/S
|
||||
#- [X] "rho" symbol instead of "Corr:" text
|
||||
#- [X] Dot size a bit smaller
|
||||
#- [X] Plot lines slightly thinner
|
||||
#
|
||||
#
|
||||
png('~/tmp/foo.png',
|
||||
width=10,
|
||||
height=10,
|
||||
units="in",
|
||||
res=300)
|
||||
#
|
||||
#corr_plotting_df = corr_df_ps
|
||||
colnames(corr_plotdf)
|
||||
corr_plotting_df = subset(corr_plotdf, select = -c(ethambutol,`Log10(OR)`,`-Log10(P)`, ASA, RSA, KD, RD
|
||||
, FoldX
|
||||
, DeepDDG
|
||||
, Dynamut2 ))
|
||||
colnames(corr_plotting_df)
|
||||
#ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
|
||||
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)),
|
||||
|
||||
upper = list(continuous = wrap('cor',
|
||||
method = "spearman",
|
||||
use = "pairwise.complete.obs",
|
||||
title="ρ",
|
||||
digits=2,
|
||||
title_args=c(colour="black")
|
||||
)
|
||||
),
|
||||
lower = list(
|
||||
continuous = wrap("points", alpha = 0.7, size=0.5),
|
||||
combo = wrap("dot", alpha = 0.7, size=0.5)
|
||||
),
|
||||
aes(colour = factor(ifelse(dst_mode==0, "S", "R")), alpha = 0.5),
|
||||
title="Stability") +
|
||||
|
||||
scale_colour_manual(values = c("red", "blue")) +
|
||||
scale_fill_manual(values = c("red", "blue")) +
|
||||
theme(
|
||||
text = element_text(size=12, face="bold")
|
||||
)
|
||||
|
||||
dev.off()
|
||||
#Check all plots with LSHTM_analysis/scripts/plotting/plotting_colnames.R
|
175
scripts/plotting/plotting_thesis/version1/gg_pairs_all.R
Normal file
175
scripts/plotting/plotting_thesis/version1/gg_pairs_all.R
Normal file
|
@ -0,0 +1,175 @@
|
|||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
||||
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
||||
my_gg_pairs=function(plot_df, plot_title
|
||||
, tt_args_size = 2.5
|
||||
, gp_args_size = 2.5){
|
||||
ggpairs(plot_df,
|
||||
columns = 1:(ncol(plot_df)-1),
|
||||
upper = list(
|
||||
continuous = wrap('cor', # ggally_cor()
|
||||
method = "spearman",
|
||||
use = "pairwise.complete.obs",
|
||||
title="ρ",
|
||||
digits=2,
|
||||
justify_labels = "centre",
|
||||
title_args=list(size=tt_args_size, colour="black"),#2.5
|
||||
group_args=list(size=gp_args_size)#2.5
|
||||
)
|
||||
),
|
||||
lower = list(
|
||||
continuous = wrap("points",
|
||||
alpha = 0.7,
|
||||
size=0.125),
|
||||
combo = wrap("dot",
|
||||
alpha = 0.7,
|
||||
size=0.125)
|
||||
),
|
||||
aes(colour = factor(ifelse(dst_mode==0,
|
||||
"S",
|
||||
"R") ),
|
||||
alpha = 0.5),
|
||||
title=plot_title) +
|
||||
|
||||
scale_colour_manual(values = c("red", "blue")) +
|
||||
scale_fill_manual(values = c("red", "blue")) #+
|
||||
# theme(text = element_text(size=7,
|
||||
# face="bold"))
|
||||
}
|
||||
|
||||
DistCutOff = 10
|
||||
###########################################################################
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
merged_df3 = as.data.frame(merged_df3)
|
||||
|
||||
corr_plotdf = corr_data_extract(merged_df3
|
||||
, gene = gene
|
||||
, drug = drug
|
||||
, extract_scaled_cols = F)
|
||||
|
||||
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
|
||||
static_cols = c("Log10(MAF)"
|
||||
, "Log10(OR)"
|
||||
)
|
||||
############################################################
|
||||
#=============================================
|
||||
# Creating masked df for affinity data
|
||||
#=============================================
|
||||
corr_affinity_df = corr_plotdf
|
||||
|
||||
#----------------------
|
||||
# Mask affinity columns
|
||||
#-----------------------
|
||||
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
|
||||
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
|
||||
}
|
||||
|
||||
# if (tolower(gene)%in%geneL_na){
|
||||
# corr_affinity_df[corr_affinity_df["NCA-Dist"]>DistCutOff,"mCSM-NA"]=0
|
||||
# }
|
||||
|
||||
# count 0
|
||||
#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
|
||||
unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0)
|
||||
unmasked_vals
|
||||
|
||||
##########################################################
|
||||
#================
|
||||
# Stability
|
||||
#================
|
||||
corr_ps_colnames = c(static_cols
|
||||
, "DUET"
|
||||
, "FoldX"
|
||||
, "DeepDDG"
|
||||
, "Dynamut2"
|
||||
, aff_dist_cols
|
||||
, "dst_mode")
|
||||
|
||||
corr_df_ps = corr_plotdf[, corr_ps_colnames]
|
||||
|
||||
# Plot #1
|
||||
plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates")
|
||||
|
||||
##########################################################
|
||||
#================
|
||||
# Conservation
|
||||
#================
|
||||
corr_conservation_cols = c( static_cols
|
||||
, "ConSurf"
|
||||
, "SNAP2"
|
||||
, "PROVEAN"
|
||||
#, aff_dist_cols
|
||||
, "dst_mode"
|
||||
)
|
||||
|
||||
corr_df_cons = corr_plotdf[, corr_conservation_cols]
|
||||
|
||||
# Plot #2
|
||||
plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates")
|
||||
|
||||
##########################################################
|
||||
#================
|
||||
# Affinity: lig, ppi and na as applicable
|
||||
#================
|
||||
#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
|
||||
common_aff_colnames = c("mCSM-lig"
|
||||
, "mmCSM-lig")
|
||||
|
||||
if (tolower(gene)%in%geneL_normal){
|
||||
aff_colnames = common_aff_colnames
|
||||
}
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
aff_colnames = c(common_aff_colnames, "mCSM-NA")
|
||||
}
|
||||
|
||||
# building ffinal affinity colnames for correlation
|
||||
corr_aff_colnames = c(static_cols
|
||||
, aff_colnames
|
||||
, "dst_mode") # imp
|
||||
|
||||
corr_df_aff = corr_affinity_df[, corr_aff_colnames]
|
||||
colnames(corr_df_aff)
|
||||
|
||||
# Plot #3
|
||||
plot_corr_df_aff = my_gg_pairs(corr_df_aff, plot_title="Affinity estimates", tt_args_size = 4, gp_args_size =4)
|
||||
|
||||
#=============
|
||||
# combine
|
||||
#=============
|
||||
|
||||
#png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
|
||||
png(paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
|
||||
|
||||
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
|
||||
ggmatrix_gtable(plot_corr_df_cons),
|
||||
# ggmatrix_gtable(plot_corr_df_aff),
|
||||
# nrow=1, ncol=3, rel_heights = 7,7,3
|
||||
nrow=1,
|
||||
#rel_heights = 1,1
|
||||
labels = "AUTO",
|
||||
label_size = 12)
|
||||
dev.off()
|
||||
|
||||
# affinity corr
|
||||
#png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
|
||||
png(paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_CorrC.png"), height =7, width=7, unit="in",res=300)
|
||||
|
||||
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
|
||||
labels = "C",
|
||||
label_size = 12)
|
||||
dev.off()
|
|
@ -0,0 +1,78 @@
|
|||
##############################################################
|
||||
# PE count
|
||||
##############################################################
|
||||
rects <- data.frame(x = 1:6,
|
||||
colors = c("#ffd700" #gold
|
||||
, "#f0e68c" #khaki
|
||||
, "#da70d6"# orchid
|
||||
, "#ff1493"# deeppink
|
||||
, "#00BFC4" #, "#007d85" #blue
|
||||
, "#F8766D" )# red,
|
||||
)
|
||||
rects
|
||||
|
||||
rects$text = c("-ve Lig"
|
||||
, "+ve Lig"
|
||||
, "+ve PPI2"
|
||||
, "-ve PPI2"
|
||||
, "+ve stability"
|
||||
, "-ve stability")
|
||||
|
||||
# FOR EMBB ONLY
|
||||
rects$numbers = c(38, 0, 22, 9, 108, 681)
|
||||
rects$num_labels = paste0("n=", rects$numbers)
|
||||
|
||||
rects
|
||||
|
||||
#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
|
||||
|
||||
peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
|
||||
geom_tile(width = 1, height = 1) + # make square tiles
|
||||
geom_text(color = "black", size = 1.7) + # add white text in the middle
|
||||
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
|
||||
coord_fixed() + # make sure tiles are square
|
||||
coord_flip()+ scale_x_reverse() +
|
||||
# theme_void() # remove any axis markings
|
||||
theme_nothing() # remove any axis markings
|
||||
peP
|
||||
|
||||
peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
|
||||
geom_tile() + # make square tiles
|
||||
geom_text(color = "black", size = 1.6) + # add white text in the middle
|
||||
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
|
||||
coord_fixed() + # make sure tiles are square
|
||||
theme_nothing() # remove any axis markings
|
||||
peP2
|
||||
|
||||
|
||||
########################################################
|
||||
# MANUAL process
|
||||
#===============================
|
||||
# Sensitivity count: Site
|
||||
#==============================
|
||||
table(df3$sensitivity)
|
||||
#--------
|
||||
# embb
|
||||
#--------
|
||||
#rsc = 54
|
||||
#ccc = 46
|
||||
#ssc = 470
|
||||
|
||||
rect_rs_siteC =data.frame(mutation_class=c("A_Resistant sites"
|
||||
, "B_Common sites"
|
||||
, "C_Sensitive sites"),
|
||||
tile_colour =c("red",
|
||||
"purple",
|
||||
"blue"),
|
||||
numbers = c(rsc, ccc, ssc),
|
||||
order = c(1, 2, 3))
|
||||
|
||||
rect_rs_siteC$labels = paste0(rect_rs_siteC$mutation_class, "\nn=", rect_rs_siteC$ numbers)
|
||||
|
||||
sens_siteP = ggplot(rect_rs_siteC, aes(mutation_class, y = 0,
|
||||
fill = tile_colour,
|
||||
label = paste0("n=", numbers))) +
|
||||
geom_tile(width = 1, height = 1) +
|
||||
geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) +
|
||||
theme_nothing()
|
||||
sens_siteP
|
251
scripts/plotting/plotting_thesis/version1/preformatting.R
Normal file
251
scripts/plotting/plotting_thesis/version1/preformatting.R
Normal file
|
@ -0,0 +1,251 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||
source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
|
||||
# get plottting dfs
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
||||
###################################################################
|
||||
# FIXME: ADD distance to NA when SP replies
|
||||
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
# LigDist_colname # from globals used
|
||||
# ppi2Dist_colname #from globals used
|
||||
# naDist_colname #from globals used
|
||||
|
||||
common_cols = c("mutationinformation"
|
||||
, "X5uhc_position"
|
||||
, "X5uhc_offset"
|
||||
, "position"
|
||||
, "dst_mode"
|
||||
, "mutation_info_labels"
|
||||
, "sensitivity", dist_columns )
|
||||
|
||||
|
||||
########################################
|
||||
categ_cols_to_factor = grep( "_outcome|_info", colnames(merged_df3) )
|
||||
fact_cols = colnames(merged_df3)[categ_cols_to_factor]
|
||||
|
||||
if (any(lapply(merged_df3[, fact_cols], class) == "character")){
|
||||
cat("\nChanging", length(categ_cols_to_factor), "cols to factor")
|
||||
merged_df3[, fact_cols] <- lapply(merged_df3[, fact_cols], as.factor)
|
||||
if (all(lapply(merged_df3[, fact_cols], class) == "factor")){
|
||||
cat("\nSuccessful: cols changed to factor")
|
||||
}
|
||||
}else{
|
||||
cat("\nRequested cols aready factors")
|
||||
}
|
||||
|
||||
cat("\ncols changed to factor are:\n", colnames(merged_df3)[categ_cols_to_factor] )
|
||||
|
||||
####################################
|
||||
# merged_df3: NECESSARY pre-processing
|
||||
###################################
|
||||
#df3 = merged_df3
|
||||
plot_cols = c("mutationinformation", "mutation_info_labels", "position", "dst_mode"
|
||||
, all_cols)
|
||||
|
||||
all_cols = c(common_cols
|
||||
, all_stability_cols
|
||||
, all_affinity_cols
|
||||
, all_conserv_cols)
|
||||
|
||||
|
||||
# counting
|
||||
foo = merged_df3[, c("mutationinformation"
|
||||
, "wild_pos"
|
||||
, "position"
|
||||
, "sensitivity"
|
||||
, "avg_lig_affinity"
|
||||
, "avg_lig_affinity_scaled"
|
||||
, "avg_lig_affinity_outcome"
|
||||
, "ligand_distance"
|
||||
, "ligand_affinity_change"
|
||||
, "affinity_scaled"
|
||||
, "ligand_outcome"
|
||||
, "consurf_colour_rev"
|
||||
, "consurf_outcome")]
|
||||
|
||||
table(foo$consurf_outcome)
|
||||
|
||||
foo2 = foo[foo$ligand_distance<10,]
|
||||
|
||||
table(foo2$ligand_outcome)
|
||||
|
||||
#############################
|
||||
# wide plots SNP
|
||||
# DRUG
|
||||
length(aa_pos_drug); aa_pos_drug
|
||||
drug = foo[foo$position%in%aa_pos_drug,]
|
||||
drug$wild_pos
|
||||
|
||||
length(unique(drug$position)); unique(drug$position)
|
||||
table(drug$position)
|
||||
|
||||
drug$mutationinformation[drug$position==306]
|
||||
drug$mutationinformation[drug$position==303]
|
||||
|
||||
#CA
|
||||
length(aa_pos_ca); aa_pos_ca
|
||||
ca = foo[foo$position%in%aa_pos_ca,]
|
||||
ca$position; length(unique(ca$position))
|
||||
table(ca$position)
|
||||
|
||||
# DSL
|
||||
length(aa_pos_dsl); aa_pos_dsl
|
||||
dsl= foo[foo$position%in%aa_pos_dsl,]
|
||||
dsl$position; length(unique(dsl$position))
|
||||
table(dsl$position)
|
||||
|
||||
dsl$mutationinformation[dsl$position==330]
|
||||
dsl$mutationinformation[dsl$position==438]
|
||||
dsl$mutationinformation[dsl$position==439]
|
||||
dsl$mutationinformation[dsl$position==510]
|
||||
|
||||
|
||||
|
||||
# CDL
|
||||
length(aa_pos_cdl); aa_pos_cdl
|
||||
cdl= foo[foo$position%in%aa_pos_cdl,]
|
||||
length(unique(cdl$position)); cdl$position;
|
||||
table(cdl$position)
|
||||
|
||||
cdl$mutationinformation[cdl$position==456]
|
||||
cdl$mutationinformation[cdl$position==521]
|
||||
cdl$mutationinformation[cdl$position==554]
|
||||
cdl$mutationinformation[cdl$position==568]
|
||||
cdl$mutationinformation[cdl$position==575]
|
||||
cdl$mutationinformation[cdl$position==580]
|
||||
cdl$mutationinformation[cdl$position==658]
|
||||
cdl$mutationinformation[cdl$position==665]
|
||||
|
||||
###############################################
|
||||
# OR plot
|
||||
|
||||
bar = merged_df3[, c("mutationinformation"
|
||||
, "wild_pos"
|
||||
, "position"
|
||||
, "sensitivity"
|
||||
, affinity_dist_colnames
|
||||
, "or_mychisq"
|
||||
, "pval_fisher"
|
||||
#, "pval_chisq"
|
||||
, "neglog_pval_fisher"
|
||||
, "log10_or_mychisq")]
|
||||
|
||||
# bar$p_adj_bonferroni = p.adjust(bar$pval_fisher, method = "bonferroni")
|
||||
# bar$signif_bon = bar$p_adj_bonferroni
|
||||
# bar = dplyr::mutate(bar
|
||||
# , signif_bon = case_when(signif_bon == 0.05 ~ "."
|
||||
# , signif_bon <=0.0001 ~ '****'
|
||||
# , signif_bon <=0.001 ~ '***'
|
||||
# , signif_bon <=0.01 ~ '**'
|
||||
# , signif_bon <0.05 ~ '*'
|
||||
# , TRUE ~ 'ns'))
|
||||
|
||||
bar$p_adj_fdr = p.adjust(bar$pval_fisher, method = "BH")
|
||||
bar$signif_fdr = bar$p_adj_fdr
|
||||
bar = dplyr::mutate(bar
|
||||
, signif_fdr = case_when(signif_fdr == 0.05 ~ "."
|
||||
, signif_fdr <=0.0001 ~ '****'
|
||||
, signif_fdr <=0.001 ~ '***'
|
||||
, signif_fdr <=0.01 ~ '**'
|
||||
, signif_bon <0.05 ~ '*'
|
||||
, TRUE ~ 'ns'))
|
||||
|
||||
# sort df
|
||||
bar = bar[order(bar$or_mychisq, decreasing = T), ]
|
||||
bar = bar[, c("mutationinformation"
|
||||
, "wild_pos"
|
||||
, "position"
|
||||
, "sensitivity"
|
||||
, affinity_dist_colnames
|
||||
, "or_mychisq"
|
||||
#, "pval_fisher"
|
||||
#, "pval_chisq"
|
||||
#, "neglog_pval_fisher"
|
||||
#, "log10_or_mychisq"
|
||||
#, "signif_bon"
|
||||
, "p_adj_fdr"
|
||||
, "signif_fdr")]
|
||||
|
||||
table(bar$sensitivity)
|
||||
|
||||
table(bar$or_mychisq>1&bar$signif_fdr) # sen and res ~ OR
|
||||
|
||||
str(bar)
|
||||
sen = bar[bar$or_mychisq<1,]
|
||||
sen = na.omit(sen)
|
||||
|
||||
res = bar[bar$or_mychisq>1,]
|
||||
res = na.omit(res)
|
||||
|
||||
# comp
|
||||
bar_or = bar[!is.na(bar$or_mychisq),]
|
||||
table(bar_or$sensitivity)
|
||||
|
||||
sen1 = bar_or[bar_or$or_mychisq<1,] # sen and res ~OR
|
||||
res1 = bar_or[bar_or$or_mychisq>1,] # sen and res ~OR
|
||||
|
||||
# sanity check
|
||||
if (nrow(bar_or) == nrow(sen1) + nrow(res1) ){
|
||||
cat("\nPASS: df with or successfully sourced"
|
||||
, "\nCalculating % of muts with OR>1")
|
||||
}else{
|
||||
stop("Abort: df with or numbers mimatch")
|
||||
}
|
||||
|
||||
# percent for OR muts
|
||||
pc_orR = nrow(res1)/(nrow(sen1) + nrow(res1)); pc_orR
|
||||
cat("\nPercentage of muts with OR>1 i.e resistant:"
|
||||
, pc_orR *100 )
|
||||
|
||||
# muts with highest OR
|
||||
head(bar_or$mutationinformation, 10)
|
||||
|
||||
# sort df
|
||||
bar_or = bar_or[order(bar_or$or_mychisq
|
||||
, bar_or$ligand_distance
|
||||
, bar_or$interface_dist
|
||||
, decreasing = T), ]
|
||||
|
||||
bar_or$drug_site = ifelse(bar_or$position%in%aa_pos_drug, "drug", "no")
|
||||
table(bar_or$drug_site)
|
||||
|
||||
bar_or$dsl_site = ifelse(bar_or$position%in%aa_pos_dsl, "dsl", "no")
|
||||
table(bar_or$dsl_site)
|
||||
|
||||
bar_or$ca_site = ifelse(bar_or$position%in%aa_pos_ca, "ca", "no")
|
||||
table(bar_or$ca_site)
|
||||
|
||||
bar_or$cdl_site = ifelse(bar_or$position%in%aa_pos_cdl, "cdl", "no")
|
||||
table(bar_or$cdl_site)
|
||||
|
||||
|
||||
top10_or = bar_or[1:10,]
|
||||
|
||||
# are these active sites
|
||||
top10_or$position[top10_or$position%in%active_aa_pos]
|
||||
|
||||
|
||||
# clostest most sig
|
||||
bar_or_lig = bar_or[bar_or$ligand_distance<10,]
|
||||
bar_or_lig = bar_or_lig[order(bar_or_lig$ligand_distance, -bar_or_lig$or_mychisq), ]
|
||||
table(bar_or_lig$signif_fdr)
|
||||
|
||||
|
||||
bar_or_ppi = bar_or[bar_or$interface_dist<10,]
|
||||
bar_or_ppi = bar_or_ppi[order(bar_or_ppi$interface_dist, -bar_or_ppi$or_mychisq), ]
|
||||
table(bar_or_ppi$signif_fdr)
|
Loading…
Add table
Add a link
Reference in a new issue