LSHTM_analysis/scripts/plotting/plotting_thesis/linage_dist_ens_stability.R

138 lines
5.2 KiB
R

#!/usr/bin/env Rscript
#########################################################
# TASK: Lineage dist plots for stability:
# average the four tools
# func from : lineage_dist.R
# plotdf
# , x_axis = "duet_scaled"
# , y_axis = "lineage_labels"
# , x_lab = "DUET"
# , all_lineages = F
# , use_lineages = c("L1", "L2", "L3", "L4")
# , with_facet = F
# , facet_wrap_var = "" # FIXME: document what this is for
# , fill_categ = "mutation_info_labels"
# , fill_categ_cols = c("#E69F00", "#999999")
# , my_ats = 15 # axis text size
# , my_als = 20 # axis label size
# , my_leg_ts = 16
# , my_leg_title = 16
# , my_strip_ts = 20
# , leg_pos = c(0.8, 0.9)
# , leg_pos_wf = c("top", "left", "bottom", "right")
# , leg_dir_wf = c("horizontal", "vertical")
# , leg_label = ""
#########################################################
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/"
, tolower(gene), "/")
cat("plots will output to:", outdir_images)
#########################################################
#=======
# Data
#=======
df2 = merged_df2
#==================================
# PREFORMATTING: for consistency
# IMPORTANT for calculating effects
#==================================
head(df2$ddg_foldx)
df2['ddg_foldxC'] = abs(df2$ddg_foldx)
head(df2['ddg_foldxC'])
# reverse signs for foldx scaled values for consistency with other tools
df2['foldx_scaled_signC'] = abs(df2$foldx_scaled)
# remove the old ones from
rm_foldx_cols = c("ddg_foldx","foldx_scaled")
raw_cols_stab_revised = raw_cols_stability[!raw_cols_stability%in%rm_foldx_cols]
raw_cols_stab_revised = c(raw_cols_stab_revised,"ddg_foldxC")
scaled_cols_stab_revised = scaled_cols_stability[!scaled_cols_stability%in%rm_foldx_cols]
scaled_cols_stab_revised = c(scaled_cols_stab_revised, "foldx_scaled_signC")
#=================
# PREFORMATTING: for consistency
#=================
df2$sensitivity = ifelse(df2$dst_mode == 1, "R", "S")
table(df2$sensitivity)
cols_to_extract = colnames(df2)[colnames(df2)%in%c(common_cols
, outcome_cols_stability
, raw_cols_stability
, scaled_cols_stability
, raw_cols_stab_revised
, scaled_cols_stab_revised
, "lineage","lineage_labels")]
df2_plot = df2[, cols_to_extract]
all(table(df2_plot$lineage) == table(df2_plot$lineage_labels))
# find which stability cols to average: should contain revised foldx
if ("foldx_scaled_signC"%in%colnames(df2_plot)){
cat("\nPASS: finding stability cols to average")
cols2avg_new = which(colnames(df2_plot)%in%scaled_cols_stab_revised)
}else{
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
}
# ensemble average across predictors
df2_plot['ens_stab_new'] = rowMeans(df2_plot[, cols2avg_new])
head(df2_plot$position); head(df2_plot$mutationinformation)
table(df2_plot['ens_stab_new'])
# scaling average values
df2_plot["ens_stab_new_scaled"] = lapply(df2_plot["ens_stab_new"]
, function(x) {
scales::rescale_mid(x
, to = c(-1,1)
, from = c( min(df2_plot["ens_stab_new"])
, max(df2_plot["ens_stab_new"]))
, mid = 0
#, from = c(0,1))
)})
min(df2_plot['ens_stab_new']); max(df2_plot['ens_stab_new'])
foo = df2_plot[c("cols2avg_new", "ens_stab_new_scaled")]
min(df2_plot['ens_stab_new_scaled']); max(df2_plot['ens_stab_new_scaled'])
###########################################################
#====================
# Output Lineage plot
#====================
linD_ens_stabP = paste0(outdir_images
, tolower(gene)
,"_linD_ens_stabP.svg")
cat("\nOutput plot:", linD_ens_stabP)
svg(linD_ens_stabP, width = 10, height = 10)
linP_dm_om = lineage_distP(df2_plot
, with_facet = F
, x_axis = "ens_stab_new_scaled"
, y_axis = "lineage_labels"
, x_lab = "Average stability"
#, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
, fill_categ = "sensitivity"
, fill_categ_cols = c("red", "blue")
, label_categories = c("Resistant", "Sensitive")
, leg_label = ""
, my_ats = 22 # axis text size
, my_als = 22 # axis label size
, my_leg_ts = 22
, my_leg_title = 22
, my_strip_ts = 22
, alpha = 0.56
)
linP_dm_om
dev.off()