added lineage plots in one
This commit is contained in:
parent
285b28b1d6
commit
f6a3b7f066
4 changed files with 177 additions and 261 deletions
172
scripts/plotting/plotting_thesis/linage_bp_dist.R
Normal file
172
scripts/plotting/plotting_thesis/linage_bp_dist.R
Normal file
|
@ -0,0 +1,172 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
|
||||||
|
#########################################################
|
||||||
|
# TASK: Lineage plots [merged_df2]
|
||||||
|
# Count
|
||||||
|
# Diversity
|
||||||
|
# Average stability dist
|
||||||
|
# Avergae affinity dist: optional
|
||||||
|
#########################################################
|
||||||
|
#=======
|
||||||
|
# output
|
||||||
|
#=======
|
||||||
|
# outdir_images = paste0("~/git/Writing/thesis/images/results/"
|
||||||
|
# , tolower(gene), "/")
|
||||||
|
# cat("plots will output to:", outdir_images)
|
||||||
|
#########################################################
|
||||||
|
|
||||||
|
#===============
|
||||||
|
#Quick numbers checks
|
||||||
|
#===============
|
||||||
|
nsample_lin = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
|
||||||
|
|
||||||
|
if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){
|
||||||
|
cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin)
|
||||||
|
, "\nCounting R and S samples")
|
||||||
|
if( sum(table(nsample_lin$sensitivity)) == nrow(nsample_lin) ){
|
||||||
|
cat("\nPASSNumbers cross checked:")
|
||||||
|
print(table(nsample_lin$sensitivity))
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
stop("Abort: Numbers mismatch. Please check")
|
||||||
|
}
|
||||||
|
########################################################################
|
||||||
|
###################################################
|
||||||
|
# Lineage barplots #
|
||||||
|
###################################################
|
||||||
|
|
||||||
|
#===============================
|
||||||
|
# lineage sample and SNP count
|
||||||
|
#===============================
|
||||||
|
lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']]
|
||||||
|
, all_lineages = F
|
||||||
|
, x_categ = "sel_lineages"
|
||||||
|
, y_count = "p_count"
|
||||||
|
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||||
|
, bar_fill_categ = "count_categ"
|
||||||
|
, display_label_col = "p_count"
|
||||||
|
, bar_stat_stype = "identity"
|
||||||
|
, d_lab_size = 8
|
||||||
|
, d_lab_col = "black"
|
||||||
|
, my_xats = 25 # x axis text size
|
||||||
|
, my_yats = 25 # y axis text sized_lab_size
|
||||||
|
, my_xals = 25 # x axis label size
|
||||||
|
, my_yals = 25 # y axis label size
|
||||||
|
, my_lls = 25 # legend label size
|
||||||
|
, bar_col_labels = c("SNPs", "Total Samples")
|
||||||
|
, bar_col_values = c("grey50", "gray75")
|
||||||
|
, bar_leg_name = ""
|
||||||
|
, leg_location = "top"
|
||||||
|
, y_log10 = F
|
||||||
|
, y_scale_percent = FALSE
|
||||||
|
, y_label = c("Count")
|
||||||
|
)
|
||||||
|
|
||||||
|
#===============================
|
||||||
|
# lineage SNP diversity count
|
||||||
|
#===============================
|
||||||
|
lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
|
||||||
|
, x_categ = "sel_lineages"
|
||||||
|
, y_count = "snp_diversity"
|
||||||
|
#, all_lineages = F
|
||||||
|
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||||
|
, display_label_col = "snp_diversity_f"
|
||||||
|
, bar_stat_stype = "identity"
|
||||||
|
, x_lab_angle = 90
|
||||||
|
, d_lab_size =9
|
||||||
|
, my_xats = 25 # x axis text size
|
||||||
|
, my_yats = 25 # y axis text size
|
||||||
|
, my_xals = 25 # x axis label size
|
||||||
|
, my_yals = 25 # y axis label size
|
||||||
|
, my_lls = 25 # legend label size
|
||||||
|
, y_log10 = F
|
||||||
|
, y_scale_percent = F
|
||||||
|
, leg_location = "top"
|
||||||
|
, y_label = "Percent" #"SNP diversity"
|
||||||
|
, bp_plot_title = "nsSNP diversity"
|
||||||
|
, title_colour = "black" #"chocolate4"
|
||||||
|
, subtitle_text = NULL
|
||||||
|
, sts = 20
|
||||||
|
, subtitle_colour = "#350E20FF")
|
||||||
|
|
||||||
|
#=============================================
|
||||||
|
# Output plots: Lineage count and Diversity
|
||||||
|
#=============================================
|
||||||
|
# lineage_bp_CL = paste0(outdir_images
|
||||||
|
# ,tolower(gene)
|
||||||
|
# ,"_lineage_bp_CL_Tall.svg")
|
||||||
|
#
|
||||||
|
# cat("Lineage barplots:", lineage_bp_CL)
|
||||||
|
# svg(lineage_bp_CL, width = 8, height = 15)
|
||||||
|
#
|
||||||
|
# cowplot::plot_grid(lin_countP, lin_diversityP
|
||||||
|
# #, labels = c("(a)", "(b)", "(c)", "(d)")
|
||||||
|
# , nrow = 2
|
||||||
|
# # , ncols = 2
|
||||||
|
# , labels = "AUTO"
|
||||||
|
# , label_size = 25)
|
||||||
|
#
|
||||||
|
# dev.off()
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
###################################################
|
||||||
|
# Stability dist #
|
||||||
|
###################################################
|
||||||
|
# scaled_cols_stability = c("duet_scaled"
|
||||||
|
# , "deepddg_scaled"
|
||||||
|
# , "ddg_dynamut2_scaled"
|
||||||
|
# , "foldx_scaled"
|
||||||
|
# , "avg_stability_scaled")
|
||||||
|
|
||||||
|
my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
|
||||||
|
#plotdf = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
|
||||||
|
|
||||||
|
linP_dm_om = lineage_distP(merged_df2
|
||||||
|
, with_facet = F
|
||||||
|
, x_axis = "avg_stability_scaled"
|
||||||
|
, y_axis = "lineage_labels"
|
||||||
|
, x_lab = my_xlabel
|
||||||
|
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||||
|
#, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
|
||||||
|
, fill_categ = "sensitivity"
|
||||||
|
, fill_categ_cols = c("red", "blue")
|
||||||
|
, label_categories = c("Resistant", "Sensitive")
|
||||||
|
, leg_label = "Mutation group"
|
||||||
|
, my_ats = 22 # axis text size
|
||||||
|
, my_als = 22 # axis label size
|
||||||
|
, my_leg_ts = 22
|
||||||
|
, my_leg_title = 22
|
||||||
|
, my_strip_ts = 22
|
||||||
|
, alpha = 0.56
|
||||||
|
)
|
||||||
|
|
||||||
|
linP_dm_om
|
||||||
|
|
||||||
|
###################################################
|
||||||
|
# Affinity dist [OPTIONAL] #
|
||||||
|
###################################################
|
||||||
|
# scaled_cols_affinity = c("affinity_scaled"
|
||||||
|
# , "mmcsm_lig_scaled"
|
||||||
|
# , "mcsm_ppi2_scaled"
|
||||||
|
# , "mcsm_na_scaled"
|
||||||
|
# , "avg_affinity_scaled")
|
||||||
|
|
||||||
|
# lineage_distP(merged_df2
|
||||||
|
# , with_facet = F
|
||||||
|
# , x_axis = "avg_affinity_scaled"
|
||||||
|
# , y_axis = "lineage_labels"
|
||||||
|
# , x_lab = my_xlabel
|
||||||
|
# , use_lineages = c("L1", "L2", "L3", "L4")
|
||||||
|
# #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
|
||||||
|
# , fill_categ = "sensitivity"
|
||||||
|
# , fill_categ_cols = c("red", "blue")
|
||||||
|
# , label_categories = c("Resistant", "Sensitive")
|
||||||
|
# , leg_label = "Mutation group"
|
||||||
|
# , my_ats = 22 # axis text size
|
||||||
|
# , my_als = 22 # axis label size
|
||||||
|
# , my_leg_ts = 22
|
||||||
|
# , my_leg_title = 22
|
||||||
|
# , my_strip_ts = 22
|
||||||
|
# , alpha = 0.56
|
||||||
|
# )
|
|
@ -1,5 +1,10 @@
|
||||||
#!/usr/bin/env Rscript
|
#!/usr/bin/env Rscript
|
||||||
|
|
||||||
|
###########################################
|
||||||
|
# TASK: generate plots for lineage
|
||||||
|
# Individual plots in
|
||||||
|
#lineage_bp_both.R
|
||||||
|
#linage_dist_ens_stability.R
|
||||||
###########################################
|
###########################################
|
||||||
my_label_size = 25
|
my_label_size = 25
|
||||||
|
|
||||||
|
|
|
@ -1,186 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
|
|
||||||
#########################################################
|
|
||||||
# TASK: Lineage dist plots for stability:
|
|
||||||
# average the four tools
|
|
||||||
|
|
||||||
# func from : lineage_dist.R
|
|
||||||
# plotdf
|
|
||||||
# , x_axis = "duet_scaled"
|
|
||||||
# , y_axis = "lineage_labels"
|
|
||||||
# , x_lab = "DUET"
|
|
||||||
# , all_lineages = F
|
|
||||||
# , use_lineages = c("L1", "L2", "L3", "L4")
|
|
||||||
# , with_facet = F
|
|
||||||
# , facet_wrap_var = "" # FIXME: document what this is for
|
|
||||||
# , fill_categ = "mutation_info_labels"
|
|
||||||
# , fill_categ_cols = c("#E69F00", "#999999")
|
|
||||||
# , my_ats = 15 # axis text size
|
|
||||||
# , my_als = 20 # axis label size
|
|
||||||
# , my_leg_ts = 16
|
|
||||||
# , my_leg_title = 16
|
|
||||||
# , my_strip_ts = 20
|
|
||||||
# , leg_pos = c(0.8, 0.9)
|
|
||||||
# , leg_pos_wf = c("top", "left", "bottom", "right")
|
|
||||||
# , leg_dir_wf = c("horizontal", "vertical")
|
|
||||||
# , leg_label = ""
|
|
||||||
#########################################################
|
|
||||||
|
|
||||||
#=======
|
|
||||||
# output
|
|
||||||
#=======
|
|
||||||
outdir_images = paste0("~/git/Writing/thesis/images/results/"
|
|
||||||
, tolower(gene), "/")
|
|
||||||
cat("plots will output to:", outdir_images)
|
|
||||||
#########################################################
|
|
||||||
#=======
|
|
||||||
# Data
|
|
||||||
#=======
|
|
||||||
df2 = merged_df2
|
|
||||||
|
|
||||||
#==================================
|
|
||||||
# PREFORMATTING: for consistency
|
|
||||||
# IMPORTANT for calculating effects
|
|
||||||
#==================================
|
|
||||||
head(df2$ddg_foldx)
|
|
||||||
df2['ddg_foldxC'] = abs(df2$ddg_foldx)
|
|
||||||
head(df2['ddg_foldxC'])
|
|
||||||
|
|
||||||
# reverse signs for foldx scaled values for consistency with other tools
|
|
||||||
df2['foldx_scaled_signC'] = abs(df2$foldx_scaled)
|
|
||||||
|
|
||||||
# remove the old ones from
|
|
||||||
rm_foldx_cols = c("ddg_foldx","foldx_scaled")
|
|
||||||
raw_cols_stab_revised = raw_cols_stability[!raw_cols_stability%in%rm_foldx_cols]
|
|
||||||
raw_cols_stab_revised = c(raw_cols_stab_revised,"ddg_foldxC")
|
|
||||||
|
|
||||||
scaled_cols_stab_revised = scaled_cols_stability[!scaled_cols_stability%in%rm_foldx_cols]
|
|
||||||
scaled_cols_stab_revised = c(scaled_cols_stab_revised, "foldx_scaled_signC")
|
|
||||||
|
|
||||||
|
|
||||||
#=================
|
|
||||||
# PREFORMATTING: for consistency
|
|
||||||
#=================
|
|
||||||
cols_to_extract = colnames(df2)[colnames(df2)%in%c(common_cols
|
|
||||||
, outcome_cols_stability
|
|
||||||
, raw_cols_stability
|
|
||||||
, scaled_cols_stability
|
|
||||||
, raw_cols_stab_revised
|
|
||||||
, scaled_cols_stab_revised
|
|
||||||
, "lineage","lineage_labels")]
|
|
||||||
|
|
||||||
df2_plot = df2[, cols_to_extract]
|
|
||||||
|
|
||||||
all(table(df2_plot$lineage) == table(df2_plot$lineage_labels))
|
|
||||||
|
|
||||||
# find which stability cols to average: should contain revised foldx
|
|
||||||
if ("foldx_scaled_signC"%in%colnames(df2_plot)){
|
|
||||||
cat("\nPASS: finding stability cols to average")
|
|
||||||
cols2avg_new = which(colnames(df2_plot)%in%scaled_cols_stab_revised)
|
|
||||||
}else{
|
|
||||||
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
|
|
||||||
}
|
|
||||||
|
|
||||||
# ensemble average across predictors
|
|
||||||
df2_plot['ens_stab_new'] = rowMeans(df2_plot[, cols2avg_new])
|
|
||||||
|
|
||||||
head(df2_plot$position); head(df2_plot$mutationinformation)
|
|
||||||
table(df2_plot['ens_stab_new'])
|
|
||||||
|
|
||||||
# scaling average values
|
|
||||||
df2_plot["ens_stab_new_scaled"] = lapply(df2_plot["ens_stab_new"]
|
|
||||||
, function(x) {
|
|
||||||
scales::rescale_mid(x
|
|
||||||
, to = c(-1,1)
|
|
||||||
, from = c( min(df2_plot["ens_stab_new"])
|
|
||||||
, max(df2_plot["ens_stab_new"]))
|
|
||||||
, mid = 0
|
|
||||||
#, from = c(0,1))
|
|
||||||
)})
|
|
||||||
|
|
||||||
min(df2_plot['ens_stab_new']); max(df2_plot['ens_stab_new'])
|
|
||||||
foo = df2_plot[c("cols2avg_new", "ens_stab_new_scaled"),]
|
|
||||||
min(df2_plot['ens_stab_new_scaled']); max(df2_plot['ens_stab_new_scaled'])
|
|
||||||
|
|
||||||
###########################################################
|
|
||||||
nrow(df2_plot)
|
|
||||||
table(df2_plot$lineage)
|
|
||||||
table(df2_plot$lineage_labels)
|
|
||||||
|
|
||||||
|
|
||||||
#===============
|
|
||||||
#Quick numbers checks
|
|
||||||
#===============
|
|
||||||
nsample_lin = df2_plot[df2_plot$lineage%in%c("L1", "L2", "L3", "L4"),]
|
|
||||||
|
|
||||||
if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){
|
|
||||||
cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin)
|
|
||||||
, "\nCounting R and S samples")
|
|
||||||
if( sum(table(nsample_lin$sensitivity)) == nrow(nsample_lin) ){
|
|
||||||
cat("\nPASSNumbers cross checked:")
|
|
||||||
print(table(nsample_lin$sensitivity))
|
|
||||||
}
|
|
||||||
}else{
|
|
||||||
stop("Abort: Numbers mismatch. Please check")
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#====================
|
|
||||||
# Output Lineage plot
|
|
||||||
#====================
|
|
||||||
|
|
||||||
my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
|
|
||||||
|
|
||||||
# linD_ens_stabP = paste0(outdir_images
|
|
||||||
# , tolower(gene)
|
|
||||||
# ,"_linD_ens_stabP.svg")
|
|
||||||
#
|
|
||||||
# cat("\nOutput plot:", linD_ens_stabP)
|
|
||||||
# svg(linD_ens_stabP, width = 10, height = 10)
|
|
||||||
|
|
||||||
linP_dm_om = lineage_distP(df2_plot
|
|
||||||
, with_facet = F
|
|
||||||
, x_axis = "ens_stab_new_scaled"
|
|
||||||
, y_axis = "lineage_labels"
|
|
||||||
, x_lab = my_xlabel
|
|
||||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
|
||||||
#, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
|
|
||||||
, fill_categ = "sensitivity"
|
|
||||||
, fill_categ_cols = c("red", "blue")
|
|
||||||
, label_categories = c("Resistant", "Sensitive")
|
|
||||||
, leg_label = "Mutation group"
|
|
||||||
, my_ats = 22 # axis text size
|
|
||||||
, my_als = 22 # axis label size
|
|
||||||
, my_leg_ts = 22
|
|
||||||
, my_leg_title = 22
|
|
||||||
, my_strip_ts = 22
|
|
||||||
, alpha = 0.56
|
|
||||||
)
|
|
||||||
|
|
||||||
linP_dm_om
|
|
||||||
#dev.off()
|
|
||||||
|
|
||||||
###########################################
|
|
||||||
my_label_size = 25
|
|
||||||
|
|
||||||
linPlots_combined = paste0(outdir_images
|
|
||||||
, tolower(gene)
|
|
||||||
,"_linP_combined.svg")
|
|
||||||
|
|
||||||
cat("\nOutput plot:", linPlots_combined)
|
|
||||||
svg(linPlots_combined, width = 18, height = 12)
|
|
||||||
|
|
||||||
cowplot::plot_grid(
|
|
||||||
cowplot::plot_grid(lin_countP, lin_diversityP
|
|
||||||
, nrow = 2
|
|
||||||
# , ncols = 2
|
|
||||||
, labels = "AUTO"
|
|
||||||
, label_size = my_label_size),
|
|
||||||
NULL,
|
|
||||||
linP_dm_om,
|
|
||||||
nrow = 1,
|
|
||||||
labels = c("", "", "C"),
|
|
||||||
label_size = my_label_size,
|
|
||||||
rel_widths = c(35, 3, 52)
|
|
||||||
)
|
|
||||||
dev.off()
|
|
|
@ -1,75 +0,0 @@
|
||||||
# get plotting_dfs()
|
|
||||||
# using dfs [lf and wf] from lineage_dfL
|
|
||||||
# FIXME: add SNP diversity as the title
|
|
||||||
#===============================
|
|
||||||
# lineage sample and SNP count
|
|
||||||
#===============================
|
|
||||||
lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']]
|
|
||||||
, all_lineages = F
|
|
||||||
, x_categ = "sel_lineages"
|
|
||||||
, y_count = "p_count"
|
|
||||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
|
||||||
, bar_fill_categ = "count_categ"
|
|
||||||
, display_label_col = "p_count"
|
|
||||||
, bar_stat_stype = "identity"
|
|
||||||
, d_lab_size = 8
|
|
||||||
, d_lab_col = "black"
|
|
||||||
, my_xats = 25 # x axis text size
|
|
||||||
, my_yats = 25 # y axis text sized_lab_size
|
|
||||||
, my_xals = 25 # x axis label size
|
|
||||||
, my_yals = 25 # y axis label size
|
|
||||||
, my_lls = 25 # legend label size
|
|
||||||
, bar_col_labels = c("SNPs", "Total Samples")
|
|
||||||
, bar_col_values = c("grey50", "gray75")
|
|
||||||
, bar_leg_name = ""
|
|
||||||
, leg_location = "top"
|
|
||||||
, y_log10 = F
|
|
||||||
, y_scale_percent = FALSE
|
|
||||||
, y_label = c("Count")
|
|
||||||
)
|
|
||||||
|
|
||||||
#===============================
|
|
||||||
# lineage SNP diversity count
|
|
||||||
#===============================
|
|
||||||
lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
|
|
||||||
, x_categ = "sel_lineages"
|
|
||||||
, y_count = "snp_diversity"
|
|
||||||
#, all_lineages = F
|
|
||||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
|
||||||
, display_label_col = "snp_diversity_f"
|
|
||||||
, bar_stat_stype = "identity"
|
|
||||||
, x_lab_angle = 90
|
|
||||||
, d_lab_size =9
|
|
||||||
, my_xats = 25 # x axis text size
|
|
||||||
, my_yats = 25 # y axis text size
|
|
||||||
, my_xals = 25 # x axis label size
|
|
||||||
, my_yals = 25 # y axis label size
|
|
||||||
, my_lls = 25 # legend label size
|
|
||||||
, y_log10 = F
|
|
||||||
, y_scale_percent = F
|
|
||||||
, leg_location = "top"
|
|
||||||
, y_label = "Percent" #"SNP diversity"
|
|
||||||
, bp_plot_title = "nsSNP diversity"
|
|
||||||
, title_colour = "black" #"chocolate4"
|
|
||||||
, subtitle_text = NULL
|
|
||||||
, sts = 20
|
|
||||||
, subtitle_colour = "#350E20FF")
|
|
||||||
|
|
||||||
#=============================================
|
|
||||||
# Output plots: Lineage count and Diversity
|
|
||||||
#=============================================
|
|
||||||
# lineage_bp_CL = paste0(outdir_images
|
|
||||||
# ,tolower(gene)
|
|
||||||
# ,"_lineage_bp_CL_Tall.svg")
|
|
||||||
#
|
|
||||||
# cat("Lineage barplots:", lineage_bp_CL)
|
|
||||||
# svg(lineage_bp_CL, width = 8, height = 15)
|
|
||||||
#
|
|
||||||
# cowplot::plot_grid(lin_countP, lin_diversityP
|
|
||||||
# #, labels = c("(a)", "(b)", "(c)", "(d)")
|
|
||||||
# , nrow = 2
|
|
||||||
# # , ncols = 2
|
|
||||||
# , labels = "AUTO"
|
|
||||||
# , label_size = 25)
|
|
||||||
#
|
|
||||||
# dev.off()
|
|
Loading…
Add table
Add a link
Reference in a new issue