LSHTM_analysis/scripts/plotting/plotting_thesis/preformatting.R

136 lines
4.6 KiB
R

#!/usr/bin/env Rscript
#source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
# get plottting dfs
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
###################################################################
# FIXME: ADD distance to NA when SP replies
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
# LigDist_colname # from globals used
# ppi2Dist_colname #from globals used
# naDist_colname #from globals used
common_cols = c("mutationinformation"
, "X5uhc_position"
, "X5uhc_offset"
, "position"
, "dst_mode"
, "mutation_info_labels"
, "sensitivity", dist_columns )
#===================
# stability cols
#===================
raw_cols_stability = c("duet_stability_change"
, "deepddg"
, "ddg_dynamut2"
, "ddg_foldx"
, "avg_stability")
scaled_cols_stability = c("duet_scaled"
, "deepddg_scaled"
, "ddg_dynamut2_scaled"
, "foldx_scaled"
, "foldx_scaled_signC" # needed to get avg stability
, "avg_stability_scaled")
outcome_cols_stability = c("duet_outcome"
, "deepddg_outcome"
, "ddg_dynamut2_outcome"
, "foldx_outcome"
, "avg_stability_outcome")
all_stability_cols = c(raw_cols_stability
, scaled_cols_stability
, outcome_cols_stability)
#===================
# affinity cols
#===================
raw_cols_affinity = c("ligand_affinity_change"
, "mmcsm_lig"
, "mcsm_ppi2_affinity"
, "mcsm_na_affinity"
, "avg_lig_affinity")
scaled_cols_affinity = c("affinity_scaled"
, "mmcsm_lig_scaled"
, "mcsm_ppi2_scaled"
, "mcsm_na_scaled"
, "avg_lig_affinity_scaled")
outcome_cols_affinity = c( "ligand_outcome"
, "mmcsm_lig_outcome"
, "mcsm_ppi2_outcome"
, "mcsm_na_outcome"
, "avg_lig_affinity_outcome")
all_affinity_cols = c(raw_cols_affinity
, scaled_cols_affinity
, outcome_cols_affinity)
#===================
# conservation cols
#===================
raw_cols_conservation = c("consurf_score"
, "snap2_score"
, "provean_score")
scaled_cols_conservation = c("consurf_scaled"
, "snap2_scaled"
, "provean_scaled")
outcome_cols_conservation = c("provean_outcome"
, "snap2_outcome"
, "consurf_colour_rev"
, "consurf_outcome")
all_conserv_cols = c(raw_cols_conservation
, scaled_cols_conservation
, outcome_cols_conservation)
########################################
categ_cols_to_factor = grep( "_outcome|_info", colnames(merged_df3) )
fact_cols = colnames(merged_df3)[categ_cols_to_factor]
if (any(lapply(merged_df3[, fact_cols], class) == "character")){
cat("\nChanging", length(categ_cols_to_factor), "cols to factor")
merged_df3[, fact_cols] <- lapply(merged_df3[, fact_cols], as.factor)
if (all(lapply(merged_df3[, fact_cols], class) == "factor")){
cat("\nSuccessful: cols changed to factor")
}
}else{
cat("\nRequested cols aready factors")
}
cat("\ncols changed to factor are:\n", colnames(merged_df3)[categ_cols_to_factor] )
####################################
# merged_df3: NECESSARY pre-processing
###################################
#df3 = merged_df3
plot_cols = c("mutationinformation", "mutation_info_labels", "position", "dst_mode"
, all_cols)
all_cols = c(common_cols
, all_stability_cols
, all_affinity_cols
, all_conserv_cols)