LSHTM_analysis/scripts/plotting/plotting_thesis/linage_dist_ens_stability.R

186 lines
No EOL
6.5 KiB
R

#!/usr/bin/env Rscript
#########################################################
# TASK: Lineage dist plots for stability:
# average the four tools
# func from : lineage_dist.R
# plotdf
# , x_axis = "duet_scaled"
# , y_axis = "lineage_labels"
# , x_lab = "DUET"
# , all_lineages = F
# , use_lineages = c("L1", "L2", "L3", "L4")
# , with_facet = F
# , facet_wrap_var = "" # FIXME: document what this is for
# , fill_categ = "mutation_info_labels"
# , fill_categ_cols = c("#E69F00", "#999999")
# , my_ats = 15 # axis text size
# , my_als = 20 # axis label size
# , my_leg_ts = 16
# , my_leg_title = 16
# , my_strip_ts = 20
# , leg_pos = c(0.8, 0.9)
# , leg_pos_wf = c("top", "left", "bottom", "right")
# , leg_dir_wf = c("horizontal", "vertical")
# , leg_label = ""
#########################################################
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/"
, tolower(gene), "/")
cat("plots will output to:", outdir_images)
#########################################################
#=======
# Data
#=======
df2 = merged_df2
#==================================
# PREFORMATTING: for consistency
# IMPORTANT for calculating effects
#==================================
head(df2$ddg_foldx)
df2['ddg_foldxC'] = abs(df2$ddg_foldx)
head(df2['ddg_foldxC'])
# reverse signs for foldx scaled values for consistency with other tools
df2['foldx_scaled_signC'] = abs(df2$foldx_scaled)
# remove the old ones from
rm_foldx_cols = c("ddg_foldx","foldx_scaled")
raw_cols_stab_revised = raw_cols_stability[!raw_cols_stability%in%rm_foldx_cols]
raw_cols_stab_revised = c(raw_cols_stab_revised,"ddg_foldxC")
scaled_cols_stab_revised = scaled_cols_stability[!scaled_cols_stability%in%rm_foldx_cols]
scaled_cols_stab_revised = c(scaled_cols_stab_revised, "foldx_scaled_signC")
#=================
# PREFORMATTING: for consistency
#=================
cols_to_extract = colnames(df2)[colnames(df2)%in%c(common_cols
, outcome_cols_stability
, raw_cols_stability
, scaled_cols_stability
, raw_cols_stab_revised
, scaled_cols_stab_revised
, "lineage","lineage_labels")]
df2_plot = df2[, cols_to_extract]
all(table(df2_plot$lineage) == table(df2_plot$lineage_labels))
# find which stability cols to average: should contain revised foldx
if ("foldx_scaled_signC"%in%colnames(df2_plot)){
cat("\nPASS: finding stability cols to average")
cols2avg_new = which(colnames(df2_plot)%in%scaled_cols_stab_revised)
}else{
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
}
# ensemble average across predictors
df2_plot['ens_stab_new'] = rowMeans(df2_plot[, cols2avg_new])
head(df2_plot$position); head(df2_plot$mutationinformation)
table(df2_plot['ens_stab_new'])
# scaling average values
df2_plot["ens_stab_new_scaled"] = lapply(df2_plot["ens_stab_new"]
, function(x) {
scales::rescale_mid(x
, to = c(-1,1)
, from = c( min(df2_plot["ens_stab_new"])
, max(df2_plot["ens_stab_new"]))
, mid = 0
#, from = c(0,1))
)})
min(df2_plot['ens_stab_new']); max(df2_plot['ens_stab_new'])
foo = df2_plot[c("cols2avg_new", "ens_stab_new_scaled"),]
min(df2_plot['ens_stab_new_scaled']); max(df2_plot['ens_stab_new_scaled'])
###########################################################
nrow(df2_plot)
table(df2_plot$lineage)
table(df2_plot$lineage_labels)
#===============
#Quick numbers checks
#===============
nsample_lin = df2_plot[df2_plot$lineage%in%c("L1", "L2", "L3", "L4"),]
if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){
cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin)
, "\nCounting R and S samples")
if( sum(table(nsample_lin$sensitivity)) == nrow(nsample_lin) ){
cat("\nPASSNumbers cross checked:")
print(table(nsample_lin$sensitivity))
}
}else{
stop("Abort: Numbers mismatch. Please check")
}
#====================
# Output Lineage plot
#====================
my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
# linD_ens_stabP = paste0(outdir_images
# , tolower(gene)
# ,"_linD_ens_stabP.svg")
#
# cat("\nOutput plot:", linD_ens_stabP)
# svg(linD_ens_stabP, width = 10, height = 10)
linP_dm_om = lineage_distP(df2_plot
, with_facet = F
, x_axis = "ens_stab_new_scaled"
, y_axis = "lineage_labels"
, x_lab = my_xlabel
, use_lineages = c("L1", "L2", "L3", "L4")
#, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
, fill_categ = "sensitivity"
, fill_categ_cols = c("red", "blue")
, label_categories = c("Resistant", "Sensitive")
, leg_label = "Mutation group"
, my_ats = 22 # axis text size
, my_als = 22 # axis label size
, my_leg_ts = 22
, my_leg_title = 22
, my_strip_ts = 22
, alpha = 0.56
)
linP_dm_om
#dev.off()
###########################################
my_label_size = 25
linPlots_combined = paste0(outdir_images
, tolower(gene)
,"_linP_combined.svg")
cat("\nOutput plot:", linPlots_combined)
svg(linPlots_combined, width = 18, height = 12)
cowplot::plot_grid(
cowplot::plot_grid(lin_countP, lin_diversityP
, nrow = 2
# , ncols = 2
, labels = "AUTO"
, label_size = my_label_size),
NULL,
linP_dm_om,
nrow = 1,
labels = c("", "", "C"),
label_size = my_label_size,
rel_widths = c(35, 3, 52)
)
dev.off()