sorting the ensemble and priority for ligand affinity

2022-08-01 13:36:05 +01:00 · 2022-08-01 13:36:05 +01:00 · 79c261963b
commit 79c261963b
parent f3710bfaf5
2 changed files with 66 additions and 59 deletions
--- a/scripts/plotting/mcsm_mean_affinity_ensemble.R
+++ b/scripts/plotting/mcsm_mean_affinity_ensemble.R
@ -1,20 +1,31 @@
 #source("~/git/LSHTM_analysis/config/pnca.R")
-source("~/git/LSHTM_analysis/config/alr.R")
+#source("~/git/LSHTM_analysis/config/alr.R")
-source("~/git/LSHTM_analysis/config/gid.R")
+#source("~/git/LSHTM_analysis/config/gid.R")
 #source("~/git/LSHTM_analysis/config/embb.R")
 #source("~/git/LSHTM_analysis/config/katg.R")
 source("~/git/LSHTM_analysis/config/rpob.R")
 source("/home/tanu/git/LSHTM_analysis/my_header.R")
 #########################################################
-# TASK: Generate averaged stability values 
+# TASK: Generate averaged affinity values 
-# across all stability tools
+# across all affinity tools for a given structure
-# for a given structure
+# as applicable...
 #########################################################
 #=======
 # output
 #=======
 outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene))
-outfile_mean_ens_st_aff = paste0(outdir_images, "/", tolower(gene)
+
-                                , "_mean_ens_stab_aff.csv")
+#OutFile1
-print(paste0("Output file:", outfile_mean_ens_st_aff))
+outfile_mean_aff = paste0(outdir_images, "/", tolower(gene)
                                , "_mean_affinity_all.csv")
 print(paste0("Output file:", outfile_mean_aff))
 #OutFile2
 outfile_mean_aff_priorty = paste0(outdir_images, "/", tolower(gene)
                          , "_mean_affinity_priority.csv")
 print(paste0("Output file:", outfile_mean_aff_priorty))
 #%%===============================================================
@ -53,48 +64,49 @@ common_cols  = c("mutationinformation"
                 , "sensitivity"
                 , "ligand_distance")
 # ADD the ones for mcsm_na etc
 #optional_cols = c() 
 all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)]
 #TODO: affinity_cols
 scaled_cols = c("duet_scaled"          , "duet_stability_change"
-                ,"deepddg_scaled"      , "deepddg"   
+                , "deepddg_scaled"      , "deepddg"   
-                ,"ddg_dynamut2_scaled" , "ddg_dynamut2"
+                , "ddg_dynamut2_scaled" , "ddg_dynamut2"
-                ,"foldx_scaled"        , "ddg_foldx"
+                , "foldx_scaled"        , "ddg_foldx"
                , "affinity_scaled"     , "ligand_affinity_change"
                , "mmcsm_lig_scaled"    , "mmcsm_lig"                
                , "mcsm_ppi2_scaled"   , "mcsm_ppi2_affinity"
                , "mcsm_na_scaled"     , "mcsm_na_affinity"
-                #,"consurf_scaled"      , "consurf_score"
+                #, "consurf_scaled"      , "consurf_score"
-                #,"snap2_scaled"        , "snap2_score"
+                #, "snap2_scaled"        , "snap2_score"
-                #,"provean_scaled"      , "provean_score"
+                #, "provean_scaled"      , "provean_score"
                #,"affinity_scaled"     , "ligand_affinity_change"
                #,"mmcsm_lig_scaled"    , "mmcsm_lig"
                )
 all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)]
-outcome_cols = c("duet_outcome"
+outcome_cols_aff = c("duet_outcome"
                 , "deepddg_outcome"
                 , "ddg_dynamut2_outcome"
                 , "foldx_outcome"
                 #, "ddg_foldx", "foldx_scaled"
-                 
+                 , "ligand_outcome"
                 , "mmcsm_lig_outcome"
                 , "mcsm_ppi2_outcome"
                 , "mcsm_na_outcome"
                 # consurf outcome doesn't exist
                 #,"provean_outcome"
                 #,"snap2_outcome"
                 #,"ligand_outcome"
                 #,"mmcsm_lig_outcome"
                 #, "mcsm_ppi2_outcome"
                 #, "mcsm_na_outcome"
                 )
 cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols
                                                    , scaled_cols
                                                    , outcome_cols_aff)]
 cols_to_extract  = cols_to_consider[cols_to_consider%in%c(common_cols
                                                          , outcome_cols_aff)]
 ##############################################################
 #####################
-# Ensemble stability
+# Ensemble affinity
 #####################
 # extract outcome cols and map numeric values to the categories
 # Destabilising == 1, and stabilising == 0
 df3_plot = df3[, cols_to_extract]
-df3_plot[, outcome_cols] <- sapply(df3_plot[, outcome_cols]
+df3_plot[, outcome_cols_aff] <- sapply(df3_plot[, outcome_cols_aff]
                             , function(x){ifelse(x == "Destabilising", 0, 1)})
 #=====================================
@ -104,7 +116,7 @@ df3_plot[, outcome_cols] <- sapply(df3_plot[, outcome_cols]
 # column to average: ens_stability
 #=====================================
-cols_to_average = which(colnames(df3_plot)%in%outcome_cols)
+cols_to_average = which(colnames(df3_plot)%in%outcome_cols_aff)
 # ensemble average across predictors
 df3_plot$ens_stability = rowMeans(df3_plot[,cols_to_average])
@ -166,8 +178,8 @@ outcome_cols_affinity = colnames(df3)[colnames(df3)%in%affinity_outcome_colnames
 outcome_cols_affinity =  c("ligand_outcome"
                           ,"mmcsm_lig_outcome")
-cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols, scaled_cols, outcome_cols, outcome_cols_affinity)]
+cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols, scaled_cols, outcome_cols_aff, outcome_cols_affinity)]
-cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols, outcome_cols)]
+cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols, outcome_cols_aff)]
 foo = df3[, cols_to_consider]
 df3_plot_orig = df3[, cols_to_extract]
--- a/scripts/plotting/mcsm_mean_stability_ensemble.R
+++ b/scripts/plotting/mcsm_mean_stability_ensemble.R
@ -1,4 +1,4 @@
-source("~/git/LSHTM_analysis/config/pnca.R")
+#source("~/git/LSHTM_analysis/config/pnca.R")
 #source("~/git/LSHTM_analysis/config/alr.R")
 #source("~/git/LSHTM_analysis/config/gid.R")
 #source("~/git/LSHTM_analysis/config/embb.R")
@ -57,38 +57,33 @@ common_cols  = c("mutationinformation"
                 , "sensitivity"
                 , "ligand_distance")
 # ADD the ones for mcsm_na etc
 #optional_cols = c() 
 all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)]
 scaled_cols = c("duet_scaled"          , "duet_stability_change"
-                ,"deepddg_scaled"      , "deepddg"   
+                , "deepddg_scaled"      , "deepddg"   
-                ,"ddg_dynamut2_scaled" , "ddg_dynamut2"
+                , "ddg_dynamut2_scaled" , "ddg_dynamut2"
-                ,"foldx_scaled"        , "ddg_foldx"
+                , "foldx_scaled"        , "ddg_foldx"
                , "affinity_scaled"     , "ligand_affinity_change"
                , "mmcsm_lig_scaled"    , "mmcsm_lig"                
                , "mcsm_ppi2_scaled"   , "mcsm_ppi2_affinity"
                , "mcsm_na_scaled"     , "mcsm_na_affinity"
-                #,"consurf_scaled"      , "consurf_score"
+                #, "consurf_scaled"      , "consurf_score"
-                #,"snap2_scaled"        , "snap2_score"
+                #, "snap2_scaled"        , "snap2_score"
-                #,"provean_scaled"      , "provean_score"
+                #, "provean_scaled"      , "provean_score"
-                #,"affinity_scaled"     , "ligand_affinity_change"
+)
                #,"mmcsm_lig_scaled"    , "mmcsm_lig"
                )
 all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)]
-outcome_cols = c("duet_outcome"
+outcome_cols_aff = c("duet_outcome"
-                 , "deepddg_outcome"
+                     , "deepddg_outcome"
-                 , "ddg_dynamut2_outcome"
+                     , "ddg_dynamut2_outcome"
-                 , "foldx_outcome"
+                     , "foldx_outcome"
-                 #, "ddg_foldx", "foldx_scaled"
+                     #, "ddg_foldx", "foldx_scaled"
-                 
+                     , "ligand_outcome"
-                 # consurf outcome doesn't exist
+                     , "mmcsm_lig_outcome"
-                 #,"provean_outcome"
+                     , "mcsm_ppi2_outcome"
-                 #,"snap2_outcome"
+                     , "mcsm_na_outcome"
-                 #,"ligand_outcome"
+                     # consurf outcome doesn't exist
-                 #,"mmcsm_lig_outcome"
+                     #,"provean_outcome"
-                 #, "mcsm_ppi2_outcome"
+                     #,"snap2_outcome"
-                 #, "mcsm_na_outcome"
+)
                 )
 cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols, scaled_cols,outcome_cols)]
 cols_to_extract  = cols_to_consider[cols_to_consider%in%c(common_cols, outcome_cols)]