separted cols

2022-08-01 14:09:46 +01:00 · 2022-08-01 14:09:46 +01:00 · 0d8979dfcb
commit 0d8979dfcb
parent e750ee59aa
2 changed files with 129 additions and 156 deletions
--- a/scripts/plotting/mcsm_mean_stability_ensemble.R
+++ b/scripts/plotting/mcsm_mean_stability_ensemble.R
@ -53,54 +53,85 @@ all_colnames = as.data.frame(colnames(df3))
 common_cols  = c("mutationinformation"
                 , "position"
                 , "dst_mode"
-                 #, "mutation_info_labels"
+                 , "mutation_info_labels"
                 , "sensitivity"
                 , "ligand_distance")

 all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)]
-scaled_cols = c("duet_scaled"          , "duet_stability_change"
-                , "deepddg_scaled"      , "deepddg"   
-                , "ddg_dynamut2_scaled" , "ddg_dynamut2"
-                , "foldx_scaled"        , "ddg_foldx"
-                , "affinity_scaled"     , "ligand_affinity_change"
-                , "mmcsm_lig_scaled"    , "mmcsm_lig"                
-                , "mcsm_ppi2_scaled"   , "mcsm_ppi2_affinity"
-                , "mcsm_na_scaled"     , "mcsm_na_affinity"
-                #, "consurf_scaled"      , "consurf_score"
-                #, "snap2_scaled"        , "snap2_score"
-                #, "provean_scaled"      , "provean_score"
-)
 all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)]
-outcome_cols_aff = c("duet_outcome"
-                     , "deepddg_outcome"
-                     , "ddg_dynamut2_outcome"
-                     , "foldx_outcome"
-                     #, "ddg_foldx", "foldx_scaled"
-                     , "ligand_outcome"
-                     , "mmcsm_lig_outcome"
-                     , "mcsm_ppi2_outcome"
-                     , "mcsm_na_outcome"
-                     # consurf outcome doesn't exist
-                     #,"provean_outcome"
-                     #,"snap2_outcome"
-)
+
+#===================
+# stability cols
+#===================
+raw_cols_stability =  c("duet_stability_change"
+                        , "deepddg"
+                        , "ddg_dynamut2"
+                        , "ddg_foldx")
+
+scaled_cols_stability = c("duet_scaled"       
+                          , "deepddg_scaled"   
+                          , "ddg_dynamut2_scaled"
+                          , "foldx_scaled")
+
+outcome_cols_stability = c("duet_outcome"
+                           , "deepddg_outcome"
+                           , "ddg_dynamut2_outcome"
+                           , "foldx_outcome")
+
+#===================
+# affinity cols
+#===================
+raw_cols_affinity =  c("ligand_affinity_change"
+                       , "mmcsm_lig"
+                       , "mcsm_ppi2_affinity"
+                       , "mcsm_na_affinity")
+
+scaled_cols_affinity = c("affinity_scaled" 
+                         , "mmcsm_lig_scaled" 
+                         , "mcsm_ppi2_scaled" 
+                         , "mcsm_na_scaled" )
+
+outcome_cols_affinity  = c( "ligand_outcome"
+                            , "mmcsm_lig_outcome"
+                            , "mcsm_ppi2_outcome"
+                            , "mcsm_na_outcome")
+
+#===================
+# conservation cols
+#===================
+# raw_cols_conservation =  c("consurf_score"
+#                            , "snap2_score"
+#                            , "provean_score")
+# 
+# scaled_cols_conservation = c("consurf_scaled"
+#                              , "snap2_scaled"
+#                              , "provean_scaled")
+# 
+# # CANNOT strictly be used, as categories are not identical with conssurf missing altogether
+# outcome_cols_conservation = c("provean_outcome"
+#                               , "snap2_outcome"
+#                               #consurf outcome doesn't exist
+# )
+
+###########################################################
 cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols
-                                                    , scaled_cols
-                                                    , outcome_cols)]
+                                                    , raw_cols_stability
+                                                    , scaled_cols_stability
+                                                    , outcome_cols_stability)]

 cols_to_extract  = cols_to_consider[cols_to_consider%in%c(common_cols
-                                                          , outcome_cols)]
+                                                          , outcome_cols_stability)]

 ##############################################################
 #####################
-# Ensemble stability
+# Ensemble stability: outcome_cols_stability
 #####################
 # extract outcome cols and map numeric values to the categories
 # Destabilising == 0, and stabilising == 1, so rescaling can let -1 be destabilising
 df3_plot = df3[, cols_to_extract]

 # assign numeric values to outcome
-df3_plot[, outcome_cols] <- sapply(df3_plot[, outcome_cols]
+df3_plot[, outcome_cols_stability] <- sapply(df3_plot[, outcome_cols_stability]
                             , function(x){ifelse(x == "Destabilising", 0, 1)})
 table(df3$duet_outcome)
 table(df3_plot$duet_outcome)
@ -111,7 +142,7 @@ table(df3_plot$duet_outcome)

 # column to average: ens_stability
 #=====================================
-cols_to_average = which(colnames(df3_plot)%in%outcome_cols)
+cols_to_average = which(colnames(df3_plot)%in%outcome_cols_stability)

 # ensemble average across predictors
 df3_plot$ens_stability = rowMeans(df3_plot[,cols_to_average])