diff --git a/scripts/plotting/mcsm_mean_affinity_ensemble.R b/scripts/plotting/mcsm_mean_affinity_ensemble.R index bde3d5a..7416a27 100644 --- a/scripts/plotting/mcsm_mean_affinity_ensemble.R +++ b/scripts/plotting/mcsm_mean_affinity_ensemble.R @@ -1,20 +1,31 @@ #source("~/git/LSHTM_analysis/config/pnca.R") -source("~/git/LSHTM_analysis/config/alr.R") -source("~/git/LSHTM_analysis/config/gid.R") +#source("~/git/LSHTM_analysis/config/alr.R") +#source("~/git/LSHTM_analysis/config/gid.R") +#source("~/git/LSHTM_analysis/config/embb.R") +#source("~/git/LSHTM_analysis/config/katg.R") +source("~/git/LSHTM_analysis/config/rpob.R") + source("/home/tanu/git/LSHTM_analysis/my_header.R") ######################################################### -# TASK: Generate averaged stability values -# across all stability tools -# for a given structure +# TASK: Generate averaged affinity values +# across all affinity tools for a given structure +# as applicable... ######################################################### #======= # output #======= outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene)) -outfile_mean_ens_st_aff = paste0(outdir_images, "/", tolower(gene) - , "_mean_ens_stab_aff.csv") -print(paste0("Output file:", outfile_mean_ens_st_aff)) + +#OutFile1 +outfile_mean_aff = paste0(outdir_images, "/", tolower(gene) + , "_mean_affinity_all.csv") +print(paste0("Output file:", outfile_mean_aff)) + +#OutFile2 +outfile_mean_aff_priorty = paste0(outdir_images, "/", tolower(gene) + , "_mean_affinity_priority.csv") +print(paste0("Output file:", outfile_mean_aff_priorty)) #%%=============================================================== @@ -53,48 +64,49 @@ common_cols = c("mutationinformation" , "sensitivity" , "ligand_distance") -# ADD the ones for mcsm_na etc -#optional_cols = c() - all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)] -#TODO: affinity_cols scaled_cols = c("duet_scaled" , "duet_stability_change" - ,"deepddg_scaled" , "deepddg" - ,"ddg_dynamut2_scaled" , "ddg_dynamut2" - ,"foldx_scaled" , "ddg_foldx" + , "deepddg_scaled" , "deepddg" + , "ddg_dynamut2_scaled" , "ddg_dynamut2" + , "foldx_scaled" , "ddg_foldx" + , "affinity_scaled" , "ligand_affinity_change" + , "mmcsm_lig_scaled" , "mmcsm_lig" , "mcsm_ppi2_scaled" , "mcsm_ppi2_affinity" , "mcsm_na_scaled" , "mcsm_na_affinity" - #,"consurf_scaled" , "consurf_score" - #,"snap2_scaled" , "snap2_score" - #,"provean_scaled" , "provean_score" - #,"affinity_scaled" , "ligand_affinity_change" - #,"mmcsm_lig_scaled" , "mmcsm_lig" + #, "consurf_scaled" , "consurf_score" + #, "snap2_scaled" , "snap2_score" + #, "provean_scaled" , "provean_score" ) all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)] -outcome_cols = c("duet_outcome" +outcome_cols_aff = c("duet_outcome" , "deepddg_outcome" , "ddg_dynamut2_outcome" , "foldx_outcome" #, "ddg_foldx", "foldx_scaled" - + , "ligand_outcome" + , "mmcsm_lig_outcome" + , "mcsm_ppi2_outcome" + , "mcsm_na_outcome" # consurf outcome doesn't exist #,"provean_outcome" #,"snap2_outcome" - #,"ligand_outcome" - #,"mmcsm_lig_outcome" - #, "mcsm_ppi2_outcome" - #, "mcsm_na_outcome" ) +cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols + , scaled_cols + , outcome_cols_aff)] + +cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols + , outcome_cols_aff)] ############################################################## ##################### -# Ensemble stability +# Ensemble affinity ##################### # extract outcome cols and map numeric values to the categories # Destabilising == 1, and stabilising == 0 df3_plot = df3[, cols_to_extract] -df3_plot[, outcome_cols] <- sapply(df3_plot[, outcome_cols] +df3_plot[, outcome_cols_aff] <- sapply(df3_plot[, outcome_cols_aff] , function(x){ifelse(x == "Destabilising", 0, 1)}) #===================================== @@ -104,7 +116,7 @@ df3_plot[, outcome_cols] <- sapply(df3_plot[, outcome_cols] # column to average: ens_stability #===================================== -cols_to_average = which(colnames(df3_plot)%in%outcome_cols) +cols_to_average = which(colnames(df3_plot)%in%outcome_cols_aff) # ensemble average across predictors df3_plot$ens_stability = rowMeans(df3_plot[,cols_to_average]) @@ -166,8 +178,8 @@ outcome_cols_affinity = colnames(df3)[colnames(df3)%in%affinity_outcome_colnames outcome_cols_affinity = c("ligand_outcome" ,"mmcsm_lig_outcome") -cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols, scaled_cols, outcome_cols, outcome_cols_affinity)] -cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols, outcome_cols)] +cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols, scaled_cols, outcome_cols_aff, outcome_cols_affinity)] +cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols, outcome_cols_aff)] foo = df3[, cols_to_consider] df3_plot_orig = df3[, cols_to_extract] diff --git a/scripts/plotting/mcsm_mean_stability_ensemble.R b/scripts/plotting/mcsm_mean_stability_ensemble.R index 2f5abb2..baba72d 100644 --- a/scripts/plotting/mcsm_mean_stability_ensemble.R +++ b/scripts/plotting/mcsm_mean_stability_ensemble.R @@ -1,4 +1,4 @@ -source("~/git/LSHTM_analysis/config/pnca.R") +#source("~/git/LSHTM_analysis/config/pnca.R") #source("~/git/LSHTM_analysis/config/alr.R") #source("~/git/LSHTM_analysis/config/gid.R") #source("~/git/LSHTM_analysis/config/embb.R") @@ -57,38 +57,33 @@ common_cols = c("mutationinformation" , "sensitivity" , "ligand_distance") -# ADD the ones for mcsm_na etc -#optional_cols = c() - all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)] scaled_cols = c("duet_scaled" , "duet_stability_change" - ,"deepddg_scaled" , "deepddg" - ,"ddg_dynamut2_scaled" , "ddg_dynamut2" - ,"foldx_scaled" , "ddg_foldx" + , "deepddg_scaled" , "deepddg" + , "ddg_dynamut2_scaled" , "ddg_dynamut2" + , "foldx_scaled" , "ddg_foldx" + , "affinity_scaled" , "ligand_affinity_change" + , "mmcsm_lig_scaled" , "mmcsm_lig" , "mcsm_ppi2_scaled" , "mcsm_ppi2_affinity" , "mcsm_na_scaled" , "mcsm_na_affinity" - #,"consurf_scaled" , "consurf_score" - #,"snap2_scaled" , "snap2_score" - #,"provean_scaled" , "provean_score" - #,"affinity_scaled" , "ligand_affinity_change" - #,"mmcsm_lig_scaled" , "mmcsm_lig" - ) + #, "consurf_scaled" , "consurf_score" + #, "snap2_scaled" , "snap2_score" + #, "provean_scaled" , "provean_score" +) all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)] -outcome_cols = c("duet_outcome" - , "deepddg_outcome" - , "ddg_dynamut2_outcome" - , "foldx_outcome" - #, "ddg_foldx", "foldx_scaled" - - # consurf outcome doesn't exist - #,"provean_outcome" - #,"snap2_outcome" - #,"ligand_outcome" - #,"mmcsm_lig_outcome" - #, "mcsm_ppi2_outcome" - #, "mcsm_na_outcome" - ) - +outcome_cols_aff = c("duet_outcome" + , "deepddg_outcome" + , "ddg_dynamut2_outcome" + , "foldx_outcome" + #, "ddg_foldx", "foldx_scaled" + , "ligand_outcome" + , "mmcsm_lig_outcome" + , "mcsm_ppi2_outcome" + , "mcsm_na_outcome" + # consurf outcome doesn't exist + #,"provean_outcome" + #,"snap2_outcome" +) cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols, scaled_cols,outcome_cols)] cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols, outcome_cols)]