added lineage plots in one

This commit is contained in:
Tanushree Tunstall 2022-08-10 12:58:38 +01:00
parent 285b28b1d6
commit f6a3b7f066
4 changed files with 177 additions and 261 deletions

View file

@ -0,0 +1,172 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Lineage plots [merged_df2]
# Count
# Diversity
# Average stability dist
# Avergae affinity dist: optional
#########################################################
#=======
# output
#=======
# outdir_images = paste0("~/git/Writing/thesis/images/results/"
# , tolower(gene), "/")
# cat("plots will output to:", outdir_images)
#########################################################
#===============
#Quick numbers checks
#===============
nsample_lin = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){
cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin)
, "\nCounting R and S samples")
if( sum(table(nsample_lin$sensitivity)) == nrow(nsample_lin) ){
cat("\nPASSNumbers cross checked:")
print(table(nsample_lin$sensitivity))
}
}else{
stop("Abort: Numbers mismatch. Please check")
}
########################################################################
###################################################
# Lineage barplots #
###################################################
#===============================
# lineage sample and SNP count
#===============================
lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']]
, all_lineages = F
, x_categ = "sel_lineages"
, y_count = "p_count"
, use_lineages = c("L1", "L2", "L3", "L4")
, bar_fill_categ = "count_categ"
, display_label_col = "p_count"
, bar_stat_stype = "identity"
, d_lab_size = 8
, d_lab_col = "black"
, my_xats = 25 # x axis text size
, my_yats = 25 # y axis text sized_lab_size
, my_xals = 25 # x axis label size
, my_yals = 25 # y axis label size
, my_lls = 25 # legend label size
, bar_col_labels = c("SNPs", "Total Samples")
, bar_col_values = c("grey50", "gray75")
, bar_leg_name = ""
, leg_location = "top"
, y_log10 = F
, y_scale_percent = FALSE
, y_label = c("Count")
)
#===============================
# lineage SNP diversity count
#===============================
lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
, x_categ = "sel_lineages"
, y_count = "snp_diversity"
#, all_lineages = F
, use_lineages = c("L1", "L2", "L3", "L4")
, display_label_col = "snp_diversity_f"
, bar_stat_stype = "identity"
, x_lab_angle = 90
, d_lab_size =9
, my_xats = 25 # x axis text size
, my_yats = 25 # y axis text size
, my_xals = 25 # x axis label size
, my_yals = 25 # y axis label size
, my_lls = 25 # legend label size
, y_log10 = F
, y_scale_percent = F
, leg_location = "top"
, y_label = "Percent" #"SNP diversity"
, bp_plot_title = "nsSNP diversity"
, title_colour = "black" #"chocolate4"
, subtitle_text = NULL
, sts = 20
, subtitle_colour = "#350E20FF")
#=============================================
# Output plots: Lineage count and Diversity
#=============================================
# lineage_bp_CL = paste0(outdir_images
# ,tolower(gene)
# ,"_lineage_bp_CL_Tall.svg")
#
# cat("Lineage barplots:", lineage_bp_CL)
# svg(lineage_bp_CL, width = 8, height = 15)
#
# cowplot::plot_grid(lin_countP, lin_diversityP
# #, labels = c("(a)", "(b)", "(c)", "(d)")
# , nrow = 2
# # , ncols = 2
# , labels = "AUTO"
# , label_size = 25)
#
# dev.off()
########################################################################
###################################################
# Stability dist #
###################################################
# scaled_cols_stability = c("duet_scaled"
# , "deepddg_scaled"
# , "ddg_dynamut2_scaled"
# , "foldx_scaled"
# , "avg_stability_scaled")
my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
#plotdf = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
linP_dm_om = lineage_distP(merged_df2
, with_facet = F
, x_axis = "avg_stability_scaled"
, y_axis = "lineage_labels"
, x_lab = my_xlabel
, use_lineages = c("L1", "L2", "L3", "L4")
#, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
, fill_categ = "sensitivity"
, fill_categ_cols = c("red", "blue")
, label_categories = c("Resistant", "Sensitive")
, leg_label = "Mutation group"
, my_ats = 22 # axis text size
, my_als = 22 # axis label size
, my_leg_ts = 22
, my_leg_title = 22
, my_strip_ts = 22
, alpha = 0.56
)
linP_dm_om
###################################################
# Affinity dist [OPTIONAL] #
###################################################
# scaled_cols_affinity = c("affinity_scaled"
# , "mmcsm_lig_scaled"
# , "mcsm_ppi2_scaled"
# , "mcsm_na_scaled"
# , "avg_affinity_scaled")
# lineage_distP(merged_df2
# , with_facet = F
# , x_axis = "avg_affinity_scaled"
# , y_axis = "lineage_labels"
# , x_lab = my_xlabel
# , use_lineages = c("L1", "L2", "L3", "L4")
# #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
# , fill_categ = "sensitivity"
# , fill_categ_cols = c("red", "blue")
# , label_categories = c("Resistant", "Sensitive")
# , leg_label = "Mutation group"
# , my_ats = 22 # axis text size
# , my_als = 22 # axis label size
# , my_leg_ts = 22
# , my_leg_title = 22
# , my_strip_ts = 22
# , alpha = 0.56
# )

View file

@ -1,5 +1,10 @@
#!/usr/bin/env Rscript
###########################################
# TASK: generate plots for lineage
# Individual plots in
#lineage_bp_both.R
#linage_dist_ens_stability.R
###########################################
my_label_size = 25

View file

@ -1,186 +0,0 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Lineage dist plots for stability:
# average the four tools
# func from : lineage_dist.R
# plotdf
# , x_axis = "duet_scaled"
# , y_axis = "lineage_labels"
# , x_lab = "DUET"
# , all_lineages = F
# , use_lineages = c("L1", "L2", "L3", "L4")
# , with_facet = F
# , facet_wrap_var = "" # FIXME: document what this is for
# , fill_categ = "mutation_info_labels"
# , fill_categ_cols = c("#E69F00", "#999999")
# , my_ats = 15 # axis text size
# , my_als = 20 # axis label size
# , my_leg_ts = 16
# , my_leg_title = 16
# , my_strip_ts = 20
# , leg_pos = c(0.8, 0.9)
# , leg_pos_wf = c("top", "left", "bottom", "right")
# , leg_dir_wf = c("horizontal", "vertical")
# , leg_label = ""
#########################################################
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/"
, tolower(gene), "/")
cat("plots will output to:", outdir_images)
#########################################################
#=======
# Data
#=======
df2 = merged_df2
#==================================
# PREFORMATTING: for consistency
# IMPORTANT for calculating effects
#==================================
head(df2$ddg_foldx)
df2['ddg_foldxC'] = abs(df2$ddg_foldx)
head(df2['ddg_foldxC'])
# reverse signs for foldx scaled values for consistency with other tools
df2['foldx_scaled_signC'] = abs(df2$foldx_scaled)
# remove the old ones from
rm_foldx_cols = c("ddg_foldx","foldx_scaled")
raw_cols_stab_revised = raw_cols_stability[!raw_cols_stability%in%rm_foldx_cols]
raw_cols_stab_revised = c(raw_cols_stab_revised,"ddg_foldxC")
scaled_cols_stab_revised = scaled_cols_stability[!scaled_cols_stability%in%rm_foldx_cols]
scaled_cols_stab_revised = c(scaled_cols_stab_revised, "foldx_scaled_signC")
#=================
# PREFORMATTING: for consistency
#=================
cols_to_extract = colnames(df2)[colnames(df2)%in%c(common_cols
, outcome_cols_stability
, raw_cols_stability
, scaled_cols_stability
, raw_cols_stab_revised
, scaled_cols_stab_revised
, "lineage","lineage_labels")]
df2_plot = df2[, cols_to_extract]
all(table(df2_plot$lineage) == table(df2_plot$lineage_labels))
# find which stability cols to average: should contain revised foldx
if ("foldx_scaled_signC"%in%colnames(df2_plot)){
cat("\nPASS: finding stability cols to average")
cols2avg_new = which(colnames(df2_plot)%in%scaled_cols_stab_revised)
}else{
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
}
# ensemble average across predictors
df2_plot['ens_stab_new'] = rowMeans(df2_plot[, cols2avg_new])
head(df2_plot$position); head(df2_plot$mutationinformation)
table(df2_plot['ens_stab_new'])
# scaling average values
df2_plot["ens_stab_new_scaled"] = lapply(df2_plot["ens_stab_new"]
, function(x) {
scales::rescale_mid(x
, to = c(-1,1)
, from = c( min(df2_plot["ens_stab_new"])
, max(df2_plot["ens_stab_new"]))
, mid = 0
#, from = c(0,1))
)})
min(df2_plot['ens_stab_new']); max(df2_plot['ens_stab_new'])
foo = df2_plot[c("cols2avg_new", "ens_stab_new_scaled"),]
min(df2_plot['ens_stab_new_scaled']); max(df2_plot['ens_stab_new_scaled'])
###########################################################
nrow(df2_plot)
table(df2_plot$lineage)
table(df2_plot$lineage_labels)
#===============
#Quick numbers checks
#===============
nsample_lin = df2_plot[df2_plot$lineage%in%c("L1", "L2", "L3", "L4"),]
if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){
cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin)
, "\nCounting R and S samples")
if( sum(table(nsample_lin$sensitivity)) == nrow(nsample_lin) ){
cat("\nPASSNumbers cross checked:")
print(table(nsample_lin$sensitivity))
}
}else{
stop("Abort: Numbers mismatch. Please check")
}
#====================
# Output Lineage plot
#====================
my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
# linD_ens_stabP = paste0(outdir_images
# , tolower(gene)
# ,"_linD_ens_stabP.svg")
#
# cat("\nOutput plot:", linD_ens_stabP)
# svg(linD_ens_stabP, width = 10, height = 10)
linP_dm_om = lineage_distP(df2_plot
, with_facet = F
, x_axis = "ens_stab_new_scaled"
, y_axis = "lineage_labels"
, x_lab = my_xlabel
, use_lineages = c("L1", "L2", "L3", "L4")
#, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
, fill_categ = "sensitivity"
, fill_categ_cols = c("red", "blue")
, label_categories = c("Resistant", "Sensitive")
, leg_label = "Mutation group"
, my_ats = 22 # axis text size
, my_als = 22 # axis label size
, my_leg_ts = 22
, my_leg_title = 22
, my_strip_ts = 22
, alpha = 0.56
)
linP_dm_om
#dev.off()
###########################################
my_label_size = 25
linPlots_combined = paste0(outdir_images
, tolower(gene)
,"_linP_combined.svg")
cat("\nOutput plot:", linPlots_combined)
svg(linPlots_combined, width = 18, height = 12)
cowplot::plot_grid(
cowplot::plot_grid(lin_countP, lin_diversityP
, nrow = 2
# , ncols = 2
, labels = "AUTO"
, label_size = my_label_size),
NULL,
linP_dm_om,
nrow = 1,
labels = c("", "", "C"),
label_size = my_label_size,
rel_widths = c(35, 3, 52)
)
dev.off()

View file

@ -1,75 +0,0 @@
# get plotting_dfs()
# using dfs [lf and wf] from lineage_dfL
# FIXME: add SNP diversity as the title
#===============================
# lineage sample and SNP count
#===============================
lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']]
, all_lineages = F
, x_categ = "sel_lineages"
, y_count = "p_count"
, use_lineages = c("L1", "L2", "L3", "L4")
, bar_fill_categ = "count_categ"
, display_label_col = "p_count"
, bar_stat_stype = "identity"
, d_lab_size = 8
, d_lab_col = "black"
, my_xats = 25 # x axis text size
, my_yats = 25 # y axis text sized_lab_size
, my_xals = 25 # x axis label size
, my_yals = 25 # y axis label size
, my_lls = 25 # legend label size
, bar_col_labels = c("SNPs", "Total Samples")
, bar_col_values = c("grey50", "gray75")
, bar_leg_name = ""
, leg_location = "top"
, y_log10 = F
, y_scale_percent = FALSE
, y_label = c("Count")
)
#===============================
# lineage SNP diversity count
#===============================
lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
, x_categ = "sel_lineages"
, y_count = "snp_diversity"
#, all_lineages = F
, use_lineages = c("L1", "L2", "L3", "L4")
, display_label_col = "snp_diversity_f"
, bar_stat_stype = "identity"
, x_lab_angle = 90
, d_lab_size =9
, my_xats = 25 # x axis text size
, my_yats = 25 # y axis text size
, my_xals = 25 # x axis label size
, my_yals = 25 # y axis label size
, my_lls = 25 # legend label size
, y_log10 = F
, y_scale_percent = F
, leg_location = "top"
, y_label = "Percent" #"SNP diversity"
, bp_plot_title = "nsSNP diversity"
, title_colour = "black" #"chocolate4"
, subtitle_text = NULL
, sts = 20
, subtitle_colour = "#350E20FF")
#=============================================
# Output plots: Lineage count and Diversity
#=============================================
# lineage_bp_CL = paste0(outdir_images
# ,tolower(gene)
# ,"_lineage_bp_CL_Tall.svg")
#
# cat("Lineage barplots:", lineage_bp_CL)
# svg(lineage_bp_CL, width = 8, height = 15)
#
# cowplot::plot_grid(lin_countP, lin_diversityP
# #, labels = c("(a)", "(b)", "(c)", "(d)")
# , nrow = 2
# # , ncols = 2
# , labels = "AUTO"
# , label_size = 25)
#
# dev.off()