sorting the ensemble and priority for ligand affinity

This commit is contained in:
Tanushree Tunstall 2022-08-01 13:36:05 +01:00
parent f3710bfaf5
commit 79c261963b
2 changed files with 66 additions and 59 deletions

View file

@ -1,20 +1,31 @@
#source("~/git/LSHTM_analysis/config/pnca.R") #source("~/git/LSHTM_analysis/config/pnca.R")
source("~/git/LSHTM_analysis/config/alr.R") #source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/gid.R") #source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/rpob.R")
source("/home/tanu/git/LSHTM_analysis/my_header.R") source("/home/tanu/git/LSHTM_analysis/my_header.R")
######################################################### #########################################################
# TASK: Generate averaged stability values # TASK: Generate averaged affinity values
# across all stability tools # across all affinity tools for a given structure
# for a given structure # as applicable...
######################################################### #########################################################
#======= #=======
# output # output
#======= #=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene)) outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene))
outfile_mean_ens_st_aff = paste0(outdir_images, "/", tolower(gene)
, "_mean_ens_stab_aff.csv") #OutFile1
print(paste0("Output file:", outfile_mean_ens_st_aff)) outfile_mean_aff = paste0(outdir_images, "/", tolower(gene)
, "_mean_affinity_all.csv")
print(paste0("Output file:", outfile_mean_aff))
#OutFile2
outfile_mean_aff_priorty = paste0(outdir_images, "/", tolower(gene)
, "_mean_affinity_priority.csv")
print(paste0("Output file:", outfile_mean_aff_priorty))
#%%=============================================================== #%%===============================================================
@ -53,48 +64,49 @@ common_cols = c("mutationinformation"
, "sensitivity" , "sensitivity"
, "ligand_distance") , "ligand_distance")
# ADD the ones for mcsm_na etc
#optional_cols = c()
all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)] all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)]
#TODO: affinity_cols
scaled_cols = c("duet_scaled" , "duet_stability_change" scaled_cols = c("duet_scaled" , "duet_stability_change"
,"deepddg_scaled" , "deepddg" , "deepddg_scaled" , "deepddg"
,"ddg_dynamut2_scaled" , "ddg_dynamut2" , "ddg_dynamut2_scaled" , "ddg_dynamut2"
,"foldx_scaled" , "ddg_foldx" , "foldx_scaled" , "ddg_foldx"
, "affinity_scaled" , "ligand_affinity_change"
, "mmcsm_lig_scaled" , "mmcsm_lig"
, "mcsm_ppi2_scaled" , "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_affinity"
, "mcsm_na_scaled" , "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_affinity"
#,"consurf_scaled" , "consurf_score" #, "consurf_scaled" , "consurf_score"
#,"snap2_scaled" , "snap2_score" #, "snap2_scaled" , "snap2_score"
#,"provean_scaled" , "provean_score" #, "provean_scaled" , "provean_score"
#,"affinity_scaled" , "ligand_affinity_change"
#,"mmcsm_lig_scaled" , "mmcsm_lig"
) )
all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)] all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)]
outcome_cols = c("duet_outcome" outcome_cols_aff = c("duet_outcome"
, "deepddg_outcome" , "deepddg_outcome"
, "ddg_dynamut2_outcome" , "ddg_dynamut2_outcome"
, "foldx_outcome" , "foldx_outcome"
#, "ddg_foldx", "foldx_scaled" #, "ddg_foldx", "foldx_scaled"
, "ligand_outcome"
, "mmcsm_lig_outcome"
, "mcsm_ppi2_outcome"
, "mcsm_na_outcome"
# consurf outcome doesn't exist # consurf outcome doesn't exist
#,"provean_outcome" #,"provean_outcome"
#,"snap2_outcome" #,"snap2_outcome"
#,"ligand_outcome"
#,"mmcsm_lig_outcome"
#, "mcsm_ppi2_outcome"
#, "mcsm_na_outcome"
) )
cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols
, scaled_cols
, outcome_cols_aff)]
cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols
, outcome_cols_aff)]
############################################################## ##############################################################
##################### #####################
# Ensemble stability # Ensemble affinity
##################### #####################
# extract outcome cols and map numeric values to the categories # extract outcome cols and map numeric values to the categories
# Destabilising == 1, and stabilising == 0 # Destabilising == 1, and stabilising == 0
df3_plot = df3[, cols_to_extract] df3_plot = df3[, cols_to_extract]
df3_plot[, outcome_cols] <- sapply(df3_plot[, outcome_cols] df3_plot[, outcome_cols_aff] <- sapply(df3_plot[, outcome_cols_aff]
, function(x){ifelse(x == "Destabilising", 0, 1)}) , function(x){ifelse(x == "Destabilising", 0, 1)})
#===================================== #=====================================
@ -104,7 +116,7 @@ df3_plot[, outcome_cols] <- sapply(df3_plot[, outcome_cols]
# column to average: ens_stability # column to average: ens_stability
#===================================== #=====================================
cols_to_average = which(colnames(df3_plot)%in%outcome_cols) cols_to_average = which(colnames(df3_plot)%in%outcome_cols_aff)
# ensemble average across predictors # ensemble average across predictors
df3_plot$ens_stability = rowMeans(df3_plot[,cols_to_average]) df3_plot$ens_stability = rowMeans(df3_plot[,cols_to_average])
@ -166,8 +178,8 @@ outcome_cols_affinity = colnames(df3)[colnames(df3)%in%affinity_outcome_colnames
outcome_cols_affinity = c("ligand_outcome" outcome_cols_affinity = c("ligand_outcome"
,"mmcsm_lig_outcome") ,"mmcsm_lig_outcome")
cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols, scaled_cols, outcome_cols, outcome_cols_affinity)] cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols, scaled_cols, outcome_cols_aff, outcome_cols_affinity)]
cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols, outcome_cols)] cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols, outcome_cols_aff)]
foo = df3[, cols_to_consider] foo = df3[, cols_to_consider]
df3_plot_orig = df3[, cols_to_extract] df3_plot_orig = df3[, cols_to_extract]

View file

@ -1,4 +1,4 @@
source("~/git/LSHTM_analysis/config/pnca.R") #source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/alr.R") #source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/gid.R") #source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/embb.R") #source("~/git/LSHTM_analysis/config/embb.R")
@ -57,38 +57,33 @@ common_cols = c("mutationinformation"
, "sensitivity" , "sensitivity"
, "ligand_distance") , "ligand_distance")
# ADD the ones for mcsm_na etc
#optional_cols = c()
all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)] all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)]
scaled_cols = c("duet_scaled" , "duet_stability_change" scaled_cols = c("duet_scaled" , "duet_stability_change"
,"deepddg_scaled" , "deepddg" , "deepddg_scaled" , "deepddg"
,"ddg_dynamut2_scaled" , "ddg_dynamut2" , "ddg_dynamut2_scaled" , "ddg_dynamut2"
,"foldx_scaled" , "ddg_foldx" , "foldx_scaled" , "ddg_foldx"
, "affinity_scaled" , "ligand_affinity_change"
, "mmcsm_lig_scaled" , "mmcsm_lig"
, "mcsm_ppi2_scaled" , "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_affinity"
, "mcsm_na_scaled" , "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_affinity"
#,"consurf_scaled" , "consurf_score" #, "consurf_scaled" , "consurf_score"
#,"snap2_scaled" , "snap2_score" #, "snap2_scaled" , "snap2_score"
#,"provean_scaled" , "provean_score" #, "provean_scaled" , "provean_score"
#,"affinity_scaled" , "ligand_affinity_change" )
#,"mmcsm_lig_scaled" , "mmcsm_lig"
)
all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)] all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)]
outcome_cols = c("duet_outcome" outcome_cols_aff = c("duet_outcome"
, "deepddg_outcome" , "deepddg_outcome"
, "ddg_dynamut2_outcome" , "ddg_dynamut2_outcome"
, "foldx_outcome" , "foldx_outcome"
#, "ddg_foldx", "foldx_scaled" #, "ddg_foldx", "foldx_scaled"
, "ligand_outcome"
# consurf outcome doesn't exist , "mmcsm_lig_outcome"
#,"provean_outcome" , "mcsm_ppi2_outcome"
#,"snap2_outcome" , "mcsm_na_outcome"
#,"ligand_outcome" # consurf outcome doesn't exist
#,"mmcsm_lig_outcome" #,"provean_outcome"
#, "mcsm_ppi2_outcome" #,"snap2_outcome"
#, "mcsm_na_outcome" )
)
cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols, scaled_cols,outcome_cols)] cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols, scaled_cols,outcome_cols)]
cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols, outcome_cols)] cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols, outcome_cols)]