LSHTM_analysis/scripts/plotting/structure_figures/mcsm_mean_stability_ensemble.R

74 lines
3.2 KiB
R

#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
#########################################################
# TASK: Generate averaged stability values by position
# calculated across all stability tools
# for a given structure
#########################################################
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene))
outfile_mean_ens_st_aff = paste0(outdir_images, "/", tolower(gene)
, "_mean_ens_stability.csv")
print(paste0("Output file:", outfile_mean_ens_st_aff))
#%%===============================================================
#=============
# Input: merged_df3
#=============
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#merged_df3= paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv")
cols_to_extract_ms = c("mutationinformation", "position", "avg_stability_scaled")
df3 = merged_df3[, cols_to_extract_ms]
length(df3$mutationinformation)
# ensemble average of predictors by position
avg_stability_by_position <- df3 %>%
dplyr::group_by(position) %>%
dplyr::summarize(avg_stability_scaled_pos = mean(avg_stability_scaled))
min(avg_stability_by_position$avg_stability_scaled_pos)
max(avg_stability_by_position$avg_stability_scaled_pos)
avg_stability_by_position['avg_stability_scaled_pos_scaled'] = lapply(avg_stability_by_position['avg_stability_scaled_pos']
, function(x) {
scales::rescale_mid(x, to = c(-1,1)
#, from = c(en_stab_min,en_stab_max))
, mid = 0
, from = c(0,1))
})
cat(paste0('Average stability scores:\n'
, head(avg_stability_by_position['avg_stability_scaled_pos'])
, '\n---------------------------------------------------------------'
, '\nAverage stability scaled scores:\n'
, head(avg_stability_by_position['avg_stability_scaled_pos_scaled'])
))
all(avg_stability_by_position['avg_stability_scaled_pos'] == avg_stability_by_position['avg_stability_scaled_pos_scaled'])
# convert to a data frame
avg_stability_by_position = as.data.frame(avg_stability_by_position)
##################################################################
# output
#write.csv(combined_df, outfile_mean_ens_st_aff
write.csv(avg_stability_by_position
, outfile_mean_ens_st_aff
, row.names = F)
cat("Finished writing file:\n"
, outfile_mean_ens_st_aff
, "\nNo. of rows:", nrow(avg_stability_by_position)
, "\nNo. of cols:", ncol(avg_stability_by_position))
# end of script
#===============================================================