141 lines
5.3 KiB
R
141 lines
5.3 KiB
R
#!/usr/bin/env Rscript
|
|
|
|
#########################################################
|
|
# TASK: Lineage dist plots for stability:
|
|
# average the four tools
|
|
|
|
# func from : lineage_dist.R
|
|
# plotdf
|
|
# , x_axis = "duet_scaled"
|
|
# , y_axis = "lineage_labels"
|
|
# , x_lab = "DUET"
|
|
# , all_lineages = F
|
|
# , use_lineages = c("L1", "L2", "L3", "L4")
|
|
# , with_facet = F
|
|
# , facet_wrap_var = "" # FIXME: document what this is for
|
|
# , fill_categ = "mutation_info_labels"
|
|
# , fill_categ_cols = c("#E69F00", "#999999")
|
|
# , my_ats = 15 # axis text size
|
|
# , my_als = 20 # axis label size
|
|
# , my_leg_ts = 16
|
|
# , my_leg_title = 16
|
|
# , my_strip_ts = 20
|
|
# , leg_pos = c(0.8, 0.9)
|
|
# , leg_pos_wf = c("top", "left", "bottom", "right")
|
|
# , leg_dir_wf = c("horizontal", "vertical")
|
|
# , leg_label = ""
|
|
#########################################################
|
|
|
|
#=======
|
|
# output
|
|
#=======
|
|
outdir_images = paste0("~/git/Writing/thesis/images/results/"
|
|
, tolower(gene), "/")
|
|
cat("plots will output to:", outdir_images)
|
|
#########################################################
|
|
#=======
|
|
# Data
|
|
#=======
|
|
df2 = merged_df2
|
|
|
|
#==================================
|
|
# PREFORMATTING: for consistency
|
|
# IMPORTANT for calculating effects
|
|
#==================================
|
|
head(df2$ddg_foldx)
|
|
df2['ddg_foldxC'] = abs(df2$ddg_foldx)
|
|
head(df2['ddg_foldxC'])
|
|
|
|
# reverse signs for foldx scaled values for consistency with other tools
|
|
df2['foldx_scaled_signC'] = abs(df2$foldx_scaled)
|
|
|
|
# remove the old ones from
|
|
rm_foldx_cols = c("ddg_foldx","foldx_scaled")
|
|
raw_cols_stab_revised = raw_cols_stability[!raw_cols_stability%in%rm_foldx_cols]
|
|
raw_cols_stab_revised = c(raw_cols_stab_revised,"ddg_foldxC")
|
|
|
|
scaled_cols_stab_revised = scaled_cols_stability[!scaled_cols_stability%in%rm_foldx_cols]
|
|
scaled_cols_stab_revised = c(scaled_cols_stab_revised, "foldx_scaled_signC")
|
|
|
|
|
|
#=================
|
|
# PREFORMATTING: for consistency
|
|
#=================
|
|
df2$sensitivity = ifelse(df2$dst_mode == 1, "R", "S")
|
|
table(df2$sensitivity)
|
|
|
|
cols_to_extract = colnames(df2)[colnames(df2)%in%c(common_cols
|
|
, outcome_cols_stability
|
|
, raw_cols_stability
|
|
, scaled_cols_stability
|
|
, raw_cols_stab_revised
|
|
, scaled_cols_stab_revised
|
|
, "lineage","lineage_labels")]
|
|
|
|
df2_plot = df2[, cols_to_extract]
|
|
|
|
all(table(df2_plot$lineage) == table(df2_plot$lineage_labels))
|
|
|
|
# find which stability cols to average: should contain revised foldx
|
|
if ("foldx_scaled_signC"%in%colnames(df2_plot)){
|
|
cat("\nPASS: finding stability cols to average")
|
|
cols2avg_new = which(colnames(df2_plot)%in%scaled_cols_stab_revised)
|
|
}else{
|
|
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
|
|
}
|
|
|
|
# ensemble average across predictors
|
|
df2_plot['ens_stab_new'] = rowMeans(df2_plot[, cols2avg_new])
|
|
|
|
head(df2_plot$position); head(df2_plot$mutationinformation)
|
|
table(df2_plot['ens_stab_new'])
|
|
|
|
# scaling average values
|
|
df2_plot["ens_stab_new_scaled"] = lapply(df2_plot["ens_stab_new"]
|
|
, function(x) {
|
|
scales::rescale_mid(x
|
|
, to = c(-1,1)
|
|
, from = c( min(df2_plot["ens_stab_new"])
|
|
, max(df2_plot["ens_stab_new"]))
|
|
, mid = 0
|
|
#, from = c(0,1))
|
|
)})
|
|
|
|
min(df2_plot['ens_stab_new']); max(df2_plot['ens_stab_new'])
|
|
foo = df2_plot[c("cols2avg_new", "ens_stab_new_scaled")]
|
|
min(df2_plot['ens_stab_new_scaled']); max(df2_plot['ens_stab_new_scaled'])
|
|
|
|
###########################################################
|
|
#====================
|
|
# Output Lineage plot
|
|
#====================
|
|
nsample_lin = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
|
|
cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin) )
|
|
|
|
linD_ens_stabP = paste0(outdir_images
|
|
, tolower(gene)
|
|
,"_linD_ens_stabP.svg")
|
|
|
|
cat("\nOutput plot:", linD_ens_stabP)
|
|
svg(linD_ens_stabP, width = 10, height = 10)
|
|
|
|
linP_dm_om = lineage_distP(df2_plot
|
|
, with_facet = F
|
|
, x_axis = "ens_stab_new_scaled"
|
|
, y_axis = "lineage_labels"
|
|
, x_lab = "Average stability"
|
|
#, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
|
|
, fill_categ = "sensitivity"
|
|
, fill_categ_cols = c("red", "blue")
|
|
, label_categories = c("Resistant", "Sensitive")
|
|
, leg_label = ""
|
|
, my_ats = 22 # axis text size
|
|
, my_als = 22 # axis label size
|
|
, my_leg_ts = 22
|
|
, my_leg_title = 22
|
|
, my_strip_ts = 22
|
|
, alpha = 0.56
|
|
)
|
|
|
|
linP_dm_om
|
|
dev.off()
|