255 lines
7.8 KiB
R
255 lines
7.8 KiB
R
#!/usr/bin/env Rscript
|
|
#source("~/git/LSHTM_analysis/config/alr.R")
|
|
source("~/git/LSHTM_analysis/config/embb.R")
|
|
#source("~/git/LSHTM_analysis/config/katg.R")
|
|
#source("~/git/LSHTM_analysis/config/gid.R")
|
|
#source("~/git/LSHTM_analysis/config/pnca.R")
|
|
#source("~/git/LSHTM_analysis/config/rpob.R")
|
|
|
|
# get plottting dfs
|
|
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
|
###################################################################
|
|
# FIXME: ADD distance to NA when SP replies
|
|
# DONE: plotting_globals.R
|
|
dist_columns = c("ligand_distance", "interface_dist")
|
|
DistCutOff = 10
|
|
|
|
common_cols = c("mutationinformation"
|
|
, "X5uhc_position"
|
|
, "X5uhc_offset"
|
|
, "position"
|
|
, "dst_mode"
|
|
, "mutation_info_labels"
|
|
, "sensitivity", dist_columns )
|
|
|
|
#===================
|
|
# stability cols
|
|
#===================
|
|
raw_cols_stability = c("duet_stability_change"
|
|
, "deepddg"
|
|
, "ddg_dynamut2"
|
|
, "ddg_foldx")
|
|
|
|
scaled_cols_stability = c("duet_scaled"
|
|
, "deepddg_scaled"
|
|
, "ddg_dynamut2_scaled"
|
|
, "foldx_scaled")
|
|
|
|
outcome_cols_stability = c("duet_outcome"
|
|
, "deepddg_outcome"
|
|
, "ddg_dynamut2_outcome"
|
|
, "foldx_outcome")
|
|
|
|
#===================
|
|
# affinity cols
|
|
#===================
|
|
raw_cols_affinity = c("ligand_affinity_change"
|
|
, "mmcsm_lig"
|
|
, "mcsm_ppi2_affinity"
|
|
, "mcsm_na_affinity")
|
|
|
|
scaled_cols_affinity = c("affinity_scaled"
|
|
, "mmcsm_lig_scaled"
|
|
, "mcsm_ppi2_scaled"
|
|
, "mcsm_na_scaled" )
|
|
|
|
outcome_cols_affinity = c( "ligand_outcome"
|
|
, "mmcsm_lig_outcome"
|
|
, "mcsm_ppi2_outcome"
|
|
, "mcsm_na_outcome")
|
|
#===================
|
|
# conservation cols
|
|
#===================
|
|
raw_cols_conservation = c("consurf_score"
|
|
, "snap2_score"
|
|
, "provean_score")
|
|
|
|
scaled_cols_conservation = c("consurf_scaled"
|
|
, "snap2_scaled"
|
|
, "provean_scaled")
|
|
|
|
# CANNOT strictly be used, as categories are not identical with conssurf missing altogether
|
|
outcome_cols_conservation = c("provean_outcome"
|
|
, "snap2_outcome"
|
|
, "consurf_colour_rev"
|
|
, "consurf_colour"#doesn't exist,use this mapping
|
|
)
|
|
|
|
all_cols = c(common_cols
|
|
, raw_cols_stability
|
|
, scaled_cols_stability
|
|
, outcome_cols_stability
|
|
, raw_cols_affinity
|
|
, scaled_cols_affinity
|
|
, outcome_cols_affinity
|
|
, raw_cols_conservation
|
|
, scaled_cols_conservation
|
|
, outcome_cols_conservation)
|
|
|
|
|
|
#=======
|
|
# output
|
|
#=======
|
|
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
|
|
|
####################################
|
|
# merged_df3: NECESSARY pre-processing
|
|
###################################
|
|
df3 = merged_df3
|
|
|
|
#=================
|
|
# PREFORMATTING: for consistency
|
|
#=================
|
|
# DONE: combining_dfs.R
|
|
# df3$sensitivity = ifelse(df3$dst_mode == 1, "R", "S")
|
|
# table(df3$sensitivity)
|
|
|
|
# ConSurf labels
|
|
#consurf_colOld = "consurf_colour_rev"
|
|
#consurf_colNew = "consurf_outcome"
|
|
#df3[[consurf_colNew]] = df3[[consurf_colOld]]
|
|
#df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]])
|
|
#df3[[consurf_colNew]]
|
|
# not this bit
|
|
#!!!!!!!!!!!!!1
|
|
#levels(df3$consurf_outcome) = c( "nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9)
|
|
|
|
#levels(df3$consurf_outcome)
|
|
|
|
# SNAP2 labels
|
|
#snap2_colname = "snap2_outcome"
|
|
#df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "effect", "Effect")
|
|
#df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "neutral", "Neutral")
|
|
|
|
# for ref: not needed perse as function already does this and assigns labels for barplots
|
|
# labels_duet = levels(as.factor(df3$duet_outcome))
|
|
# labels_foldx = levels(as.factor(df3$foldx_outcome))
|
|
# labels_deepddg = levels(as.factor(df3$deepddg_outcome))
|
|
# labels_ddg_dynamut2_outcome = levels(as.factor(df3$ddg_dynamut2_outcome))
|
|
#
|
|
# labels_lig = levels(as.factor(df3_lig$ligand_outcome))
|
|
# labels_mmlig = levels(as.factor(df3_lig$mmcsm_lig_outcome))
|
|
# labels_ppi2 = levels(as.factor(df3_ppi2$mcsm_ppi2_outcome))
|
|
#
|
|
# labels_provean = levels(as.factor(df3$provean_outcome))
|
|
# labels_snap2 = levels(as.factor(df3$snap2_outcome))
|
|
# labels_consurf = levels(as.factor(df3$consurf_colour_rev))
|
|
# df3$consurf_colour_rev = as.factor(df3$consurf_colour_rev )
|
|
##############################################################################
|
|
#######################################
|
|
# merged_df2: NECESSARY pre-processing
|
|
######################################
|
|
df2 = merged_df2
|
|
|
|
#=================
|
|
# PREFORMATTING: for consistency
|
|
#=================
|
|
# DONE: combining_dfs.R
|
|
# df2$sensitivity = ifelse(df2$dst_mode == 1, "R", "S")
|
|
# table(df2$sensitivity)
|
|
|
|
#----------------------------------------------------
|
|
# Create dst2: fill na in dst with value of dst_mode
|
|
# for epistasis
|
|
#----------------------------------------------------
|
|
# DONE: combining_dfs.R
|
|
# df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2f$dst)
|
|
|
|
#----------------------------------------------------
|
|
# reverse signs for foldx scaled values for
|
|
# to allow average with other tools
|
|
#----------------------------------------------------
|
|
head(df2['ddg_foldx'])
|
|
df2['ddg_foldxC'] = abs(df2$ddg_foldx)
|
|
head(df2['ddg_foldxC'])
|
|
|
|
head(df2['foldx_scaled'])
|
|
df2['foldx_scaled_signC'] = abs(df2$foldx_scaled)
|
|
head(df2['foldx_scaled_signC'])
|
|
|
|
rm_foldx_cols = c("ddg_foldx","foldx_scaled")
|
|
raw_cols_stab_revised = raw_cols_stability[!raw_cols_stability%in%rm_foldx_cols]
|
|
raw_cols_stab_revised = c(raw_cols_stab_revised,"ddg_foldxC")
|
|
|
|
scaled_cols_stab_revised = scaled_cols_stability[!scaled_cols_stability%in%rm_foldx_cols]
|
|
scaled_cols_stab_revised = c(scaled_cols_stab_revised, "foldx_scaled_signC")
|
|
|
|
######################################################
|
|
# Affinity related variables
|
|
# DONE:in plotting_globals.R
|
|
# DistCutOff = 10
|
|
# LigDist_colname # = "ligand_distance" # from globals
|
|
# ppi2Dist_colname = "interface_dist"
|
|
# naDist_colname = "TBC"
|
|
|
|
######################################################
|
|
# corr colnames
|
|
# drug
|
|
# "dst_mode"
|
|
# "ligand_distance"
|
|
# "DUET"
|
|
# "mCSM-lig"
|
|
# "FoldX"
|
|
# "DeepDDG"
|
|
# "ASA"
|
|
# "RSA"
|
|
# "KD"
|
|
# "RD"
|
|
# "Consurf"
|
|
# "SNAP2"
|
|
# "MAF"
|
|
# "Log (OR)"
|
|
# "-Log (P)"
|
|
# "Dynamut2"
|
|
# "mCSM-PPI2"
|
|
# "interface_dist"
|
|
|
|
corr_ps_colnames = c("DUET"
|
|
, "FoldX"
|
|
, "DeepDDG"
|
|
, "Dynamut2"
|
|
|
|
, "MAF"
|
|
, "Log (OR)"
|
|
, "-Log (P)"
|
|
|
|
# , "ASA"
|
|
# , "RSA"
|
|
# , "KD"
|
|
# , "RD"
|
|
# , "Consurf"
|
|
# , "SNAP2"
|
|
|
|
#, "mCSM-lig"
|
|
#, "ligand_distance"
|
|
#, "mCSM-PPI2"
|
|
#, "interface_dist"
|
|
, "dst_mode"
|
|
, drug
|
|
)
|
|
|
|
corr_lig_colnames = c("mCSM-lig"
|
|
, "MAF"
|
|
, "Log (OR)"
|
|
, "-Log (P)"
|
|
, "ligand_distance"
|
|
, "dst_mode"
|
|
, drug)
|
|
|
|
corr_ppi2_colnames = c("mCSM-PPI2"
|
|
, "SNAP2"
|
|
, "Log (OR)"
|
|
, "-Log (P)"
|
|
, "interface_dist"
|
|
, "dst_mode"
|
|
, drug)
|
|
|
|
corr_conservation_cols = c("Consurf"
|
|
, "SNAP2"
|
|
, "PROVEAN"
|
|
, "MAF"
|
|
, "Log (OR)"
|
|
, "-Log (P)"
|
|
, "dst_mode"
|
|
, drug)
|
|
|