From 0d8979dfcb773d566475b68b76ddebd7616ab2ce Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 1 Aug 2022 14:09:46 +0100 Subject: [PATCH] separted cols --- .../plotting/mcsm_mean_affinity_ensemble.R | 190 ++++++------------ .../plotting/mcsm_mean_stability_ensemble.R | 95 ++++++--- 2 files changed, 129 insertions(+), 156 deletions(-) diff --git a/scripts/plotting/mcsm_mean_affinity_ensemble.R b/scripts/plotting/mcsm_mean_affinity_ensemble.R index 35b67b4..ef6efcc 100644 --- a/scripts/plotting/mcsm_mean_affinity_ensemble.R +++ b/scripts/plotting/mcsm_mean_affinity_ensemble.R @@ -60,145 +60,87 @@ all_colnames = as.data.frame(colnames(df3)) common_cols = c("mutationinformation" , "position" , "dst_mode" - #, "mutation_info_labels" + , "mutation_info_labels" , "sensitivity" , "ligand_distance") all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)] -scaled_cols = c("duet_scaled" , "duet_stability_change" - , "deepddg_scaled" , "deepddg" - , "ddg_dynamut2_scaled" , "ddg_dynamut2" - , "foldx_scaled" , "ddg_foldx" - , "affinity_scaled" , "ligand_affinity_change" - , "mmcsm_lig_scaled" , "mmcsm_lig" - , "mcsm_ppi2_scaled" , "mcsm_ppi2_affinity" - , "mcsm_na_scaled" , "mcsm_na_affinity" - #, "consurf_scaled" , "consurf_score" - #, "snap2_scaled" , "snap2_score" - #, "provean_scaled" , "provean_score" - ) all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)] -outcome_cols_aff = c("duet_outcome" - , "deepddg_outcome" - , "ddg_dynamut2_outcome" - , "foldx_outcome" - #, "ddg_foldx", "foldx_scaled" - , "ligand_outcome" - , "mmcsm_lig_outcome" - , "mcsm_ppi2_outcome" - , "mcsm_na_outcome" - # consurf outcome doesn't exist - #,"provean_outcome" - #,"snap2_outcome" - ) + +#=================== +# stability cols +#=================== +raw_cols_stability = c("duet_stability_change" + , "deepddg" + , "ddg_dynamut2" + , "ddg_foldx") + +scaled_cols_stability = c("duet_scaled" + , "deepddg_scaled" + , "ddg_dynamut2_scaled" + , "foldx_scaled") + +outcome_cols_stability = c("duet_outcome" + , "deepddg_outcome" + , "ddg_dynamut2_outcome" + , "foldx_outcome") + +#=================== +# affinity cols +#=================== +raw_cols_affinity = c("ligand_affinity_change" + , "mmcsm_lig" + , "mcsm_ppi2_affinity" + , "mcsm_na_affinity") + +scaled_cols_affinity = c("affinity_scaled" + , "mmcsm_lig_scaled" + , "mcsm_ppi2_scaled" + , "mcsm_na_scaled" ) + +outcome_cols_affinity = c( "ligand_outcome" + , "mmcsm_lig_outcome" + , "mcsm_ppi2_outcome" + , "mcsm_na_outcome") + +#=================== +# conservation cols +#=================== +# raw_cols_conservation = c("consurf_score" +# , "snap2_score" +# , "provean_score") +# +# scaled_cols_conservation = c("consurf_scaled" +# , "snap2_scaled" +# , "provean_scaled") +# +# # CANNOT strictly be used, as categories are not identical with conssurf missing altogether +# outcome_cols_conservation = c("provean_outcome" +# , "snap2_outcome" +# #consurf outcome doesn't exist +# ) + +###################################################################### cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols + , raw_cols , scaled_cols - , outcome_cols_aff)] - -cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols - , outcome_cols_aff)] + , outcome_cols_affinity)] +# cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols +# , outcome_cols_affinity)] ############################################################## ##################### -# Ensemble affinity +# Ensemble affinity: affinity_cols ##################### # extract outcome cols and map numeric values to the categories # Destabilising == 0, and stabilising == 1 so rescaling can let -1 be destabilising -df3_plot = df3[, cols_to_extract] -df3_plot[, outcome_cols_aff] <- sapply(df3_plot[, outcome_cols_aff] - , function(x){ifelse(x == "Destabilising", 0, 1)}) - -#===================================== -# Stability (4 cols): average the scores -# across predictors ==> average by -# position ==> scale b/w -1 and 1 - -# column to average: ens_stability -#===================================== -cols_to_average = which(colnames(df3_plot)%in%outcome_cols_aff) - -# ensemble average across predictors -df3_plot$ens_stability = rowMeans(df3_plot[,cols_to_average]) - -head(df3_plot$position); head(df3_plot$mutationinformation) -head(df3_plot$ens_stability) -table(df3_plot$ens_stability) - -# ensemble average of predictors by position -mean_ens_stability_by_position <- df3_plot %>% - dplyr::group_by(position) %>% - dplyr::summarize(avg_ens_stability = mean(ens_stability)) - -# REscale b/w -1 and 1 -#en_stab_min = min(mean_ens_stability_by_position['avg_ens_stability']) -#en_stab_max = max(mean_ens_stability_by_position['avg_ens_stability']) - -# scale the average stability value between -1 and 1 -# mean_ens_by_position['averaged_stability3_scaled'] = lapply(mean_ens_by_position['averaged_stability3'] -# , function(x) ifelse(x < 0, x/abs(en3_min), x/en3_max)) - -mean_ens_stability_by_position['avg_ens_stability_scaled'] = lapply(mean_ens_stability_by_position['avg_ens_stability'] - , function(x) { - scales::rescale(x, to = c(-1,1) - #, from = c(en_stab_min,en_stab_max)) - , from = c(0,1)) - }) -cat(paste0('Average stability scores:\n' - , head(mean_ens_stability_by_position['avg_ens_stability']) - , '\n---------------------------------------------------------------' - , '\nAverage stability scaled scores:\n' - , head(mean_ens_stability_by_position['avg_ens_stability_scaled']))) - -# convert to a data frame -mean_ens_stability_by_position = as.data.frame(mean_ens_stability_by_position) - -#FIXME: sanity checks -# TODO: predetermine the bounds -# l_bound_ens = min(mean_ens_stability_by_position['avg_ens_stability_scaled']) -# u_bound_ens = max(mean_ens_stability_by_position['avg_ens_stability_scaled']) +# df3_plot = df3[, cols_to_extract] # -# if ( (l_bound_ens == -1) && (u_bound_ens == 1) ){ -# cat(paste0("PASS: ensemble stability scores averaged by position and then scaled" -# , "\nmin ensemble averaged stability: ", l_bound_ens -# , "\nmax ensemble averaged stability: ", u_bound_ens)) -# }else{ -# cat(paste0("FAIL: avergaed duet scores could not be scaled b/w -1 and 1" -# , "\nmin ensemble averaged stability: ", l_bound_ens -# , "\nmax ensemble averaged stability: ", u_bound_ens)) -# quit() -# } -################################################################## -#%% -affinity_outcome_colnames = c("ligand_outcome", "mmcsm_lig_outcome" - , "mcsm_ppi2_outcome" - , "mcsm_na_outcome") - -outcome_cols_affinity = colnames(df3)[colnames(df3)%in%affinity_outcome_colnames] -outcome_cols_affinity = c("ligand_outcome" - ,"mmcsm_lig_outcome") - -cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols, scaled_cols, outcome_cols_aff, outcome_cols_affinity)] -cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols, outcome_cols_aff)] - -foo = df3[, cols_to_consider] -df3_plot_orig = df3[, cols_to_extract] - -############################ -# Ensemble affinity: ligand -############################ -# extract ligand affinity outcome cols and map numeric values to the categories -# Destabilising == 1, and stabilising == 0 -cols_to_extract_affinity = cols_to_consider[cols_to_consider%in%c(common_cols - , outcome_cols_affinity)] - - -df3_plot_affinity = df3[, cols_to_extract_affinity] -names(df3_plot_affinity) - -df3_plot_affinity[, outcome_cols_affinity] <- sapply(df3_plot_affinity[, outcome_cols_affinity] - , function(x){ifelse(x == "Destabilising", 1, 0)}) +# df3_plot[, outcome_cols_affinity] <- sapply(df3_plot[, outcome_cols_affinity] +# , function(x){ifelse(x == "Destabilising", 0, 1)}) +df3_plot = df3[, c(common_cols, scaled_cols)] #===================================== # Affintiy (2 cols): average the scores # across predictors ==> average by @@ -206,7 +148,7 @@ df3_plot_affinity[, outcome_cols_affinity] <- sapply(df3_plot_affinity[, outcome # column to average: ens_affinity #===================================== -cols_to_average_affinity = which(colnames(df3_plot_affinity)%in%outcome_cols_affinity) +cols_to_average_affinity = which(colnames(df3_plot)%in%outcome_cols_affinity) cols_to_average_affinity # ensemble average across predictors diff --git a/scripts/plotting/mcsm_mean_stability_ensemble.R b/scripts/plotting/mcsm_mean_stability_ensemble.R index 6d63764..b6ce86f 100644 --- a/scripts/plotting/mcsm_mean_stability_ensemble.R +++ b/scripts/plotting/mcsm_mean_stability_ensemble.R @@ -53,54 +53,85 @@ all_colnames = as.data.frame(colnames(df3)) common_cols = c("mutationinformation" , "position" , "dst_mode" - #, "mutation_info_labels" + , "mutation_info_labels" , "sensitivity" , "ligand_distance") all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)] -scaled_cols = c("duet_scaled" , "duet_stability_change" - , "deepddg_scaled" , "deepddg" - , "ddg_dynamut2_scaled" , "ddg_dynamut2" - , "foldx_scaled" , "ddg_foldx" - , "affinity_scaled" , "ligand_affinity_change" - , "mmcsm_lig_scaled" , "mmcsm_lig" - , "mcsm_ppi2_scaled" , "mcsm_ppi2_affinity" - , "mcsm_na_scaled" , "mcsm_na_affinity" - #, "consurf_scaled" , "consurf_score" - #, "snap2_scaled" , "snap2_score" - #, "provean_scaled" , "provean_score" -) all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)] -outcome_cols_aff = c("duet_outcome" - , "deepddg_outcome" - , "ddg_dynamut2_outcome" - , "foldx_outcome" - #, "ddg_foldx", "foldx_scaled" - , "ligand_outcome" - , "mmcsm_lig_outcome" - , "mcsm_ppi2_outcome" - , "mcsm_na_outcome" - # consurf outcome doesn't exist - #,"provean_outcome" - #,"snap2_outcome" -) + +#=================== +# stability cols +#=================== +raw_cols_stability = c("duet_stability_change" + , "deepddg" + , "ddg_dynamut2" + , "ddg_foldx") + +scaled_cols_stability = c("duet_scaled" + , "deepddg_scaled" + , "ddg_dynamut2_scaled" + , "foldx_scaled") + +outcome_cols_stability = c("duet_outcome" + , "deepddg_outcome" + , "ddg_dynamut2_outcome" + , "foldx_outcome") + +#=================== +# affinity cols +#=================== +raw_cols_affinity = c("ligand_affinity_change" + , "mmcsm_lig" + , "mcsm_ppi2_affinity" + , "mcsm_na_affinity") + +scaled_cols_affinity = c("affinity_scaled" + , "mmcsm_lig_scaled" + , "mcsm_ppi2_scaled" + , "mcsm_na_scaled" ) + +outcome_cols_affinity = c( "ligand_outcome" + , "mmcsm_lig_outcome" + , "mcsm_ppi2_outcome" + , "mcsm_na_outcome") + +#=================== +# conservation cols +#=================== +# raw_cols_conservation = c("consurf_score" +# , "snap2_score" +# , "provean_score") +# +# scaled_cols_conservation = c("consurf_scaled" +# , "snap2_scaled" +# , "provean_scaled") +# +# # CANNOT strictly be used, as categories are not identical with conssurf missing altogether +# outcome_cols_conservation = c("provean_outcome" +# , "snap2_outcome" +# #consurf outcome doesn't exist +# ) + +########################################################### cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols - , scaled_cols - , outcome_cols)] + , raw_cols_stability + , scaled_cols_stability + , outcome_cols_stability)] cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols - , outcome_cols)] + , outcome_cols_stability)] ############################################################## ##################### -# Ensemble stability +# Ensemble stability: outcome_cols_stability ##################### # extract outcome cols and map numeric values to the categories # Destabilising == 0, and stabilising == 1, so rescaling can let -1 be destabilising df3_plot = df3[, cols_to_extract] # assign numeric values to outcome -df3_plot[, outcome_cols] <- sapply(df3_plot[, outcome_cols] +df3_plot[, outcome_cols_stability] <- sapply(df3_plot[, outcome_cols_stability] , function(x){ifelse(x == "Destabilising", 0, 1)}) table(df3$duet_outcome) table(df3_plot$duet_outcome) @@ -111,7 +142,7 @@ table(df3_plot$duet_outcome) # column to average: ens_stability #===================================== -cols_to_average = which(colnames(df3_plot)%in%outcome_cols) +cols_to_average = which(colnames(df3_plot)%in%outcome_cols_stability) # ensemble average across predictors df3_plot$ens_stability = rowMeans(df3_plot[,cols_to_average])