added lineage plots in one
This commit is contained in:
parent
285b28b1d6
commit
f6a3b7f066
4 changed files with 177 additions and 261 deletions
172
scripts/plotting/plotting_thesis/linage_bp_dist.R
Normal file
172
scripts/plotting/plotting_thesis/linage_bp_dist.R
Normal file
|
@ -0,0 +1,172 @@
|
|||
#!/usr/bin/env Rscript
|
||||
|
||||
#########################################################
|
||||
# TASK: Lineage plots [merged_df2]
|
||||
# Count
|
||||
# Diversity
|
||||
# Average stability dist
|
||||
# Avergae affinity dist: optional
|
||||
#########################################################
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
# outdir_images = paste0("~/git/Writing/thesis/images/results/"
|
||||
# , tolower(gene), "/")
|
||||
# cat("plots will output to:", outdir_images)
|
||||
#########################################################
|
||||
|
||||
#===============
|
||||
#Quick numbers checks
|
||||
#===============
|
||||
nsample_lin = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
|
||||
|
||||
if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){
|
||||
cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin)
|
||||
, "\nCounting R and S samples")
|
||||
if( sum(table(nsample_lin$sensitivity)) == nrow(nsample_lin) ){
|
||||
cat("\nPASSNumbers cross checked:")
|
||||
print(table(nsample_lin$sensitivity))
|
||||
}
|
||||
}else{
|
||||
stop("Abort: Numbers mismatch. Please check")
|
||||
}
|
||||
########################################################################
|
||||
###################################################
|
||||
# Lineage barplots #
|
||||
###################################################
|
||||
|
||||
#===============================
|
||||
# lineage sample and SNP count
|
||||
#===============================
|
||||
lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']]
|
||||
, all_lineages = F
|
||||
, x_categ = "sel_lineages"
|
||||
, y_count = "p_count"
|
||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||
, bar_fill_categ = "count_categ"
|
||||
, display_label_col = "p_count"
|
||||
, bar_stat_stype = "identity"
|
||||
, d_lab_size = 8
|
||||
, d_lab_col = "black"
|
||||
, my_xats = 25 # x axis text size
|
||||
, my_yats = 25 # y axis text sized_lab_size
|
||||
, my_xals = 25 # x axis label size
|
||||
, my_yals = 25 # y axis label size
|
||||
, my_lls = 25 # legend label size
|
||||
, bar_col_labels = c("SNPs", "Total Samples")
|
||||
, bar_col_values = c("grey50", "gray75")
|
||||
, bar_leg_name = ""
|
||||
, leg_location = "top"
|
||||
, y_log10 = F
|
||||
, y_scale_percent = FALSE
|
||||
, y_label = c("Count")
|
||||
)
|
||||
|
||||
#===============================
|
||||
# lineage SNP diversity count
|
||||
#===============================
|
||||
lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
|
||||
, x_categ = "sel_lineages"
|
||||
, y_count = "snp_diversity"
|
||||
#, all_lineages = F
|
||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||
, display_label_col = "snp_diversity_f"
|
||||
, bar_stat_stype = "identity"
|
||||
, x_lab_angle = 90
|
||||
, d_lab_size =9
|
||||
, my_xats = 25 # x axis text size
|
||||
, my_yats = 25 # y axis text size
|
||||
, my_xals = 25 # x axis label size
|
||||
, my_yals = 25 # y axis label size
|
||||
, my_lls = 25 # legend label size
|
||||
, y_log10 = F
|
||||
, y_scale_percent = F
|
||||
, leg_location = "top"
|
||||
, y_label = "Percent" #"SNP diversity"
|
||||
, bp_plot_title = "nsSNP diversity"
|
||||
, title_colour = "black" #"chocolate4"
|
||||
, subtitle_text = NULL
|
||||
, sts = 20
|
||||
, subtitle_colour = "#350E20FF")
|
||||
|
||||
#=============================================
|
||||
# Output plots: Lineage count and Diversity
|
||||
#=============================================
|
||||
# lineage_bp_CL = paste0(outdir_images
|
||||
# ,tolower(gene)
|
||||
# ,"_lineage_bp_CL_Tall.svg")
|
||||
#
|
||||
# cat("Lineage barplots:", lineage_bp_CL)
|
||||
# svg(lineage_bp_CL, width = 8, height = 15)
|
||||
#
|
||||
# cowplot::plot_grid(lin_countP, lin_diversityP
|
||||
# #, labels = c("(a)", "(b)", "(c)", "(d)")
|
||||
# , nrow = 2
|
||||
# # , ncols = 2
|
||||
# , labels = "AUTO"
|
||||
# , label_size = 25)
|
||||
#
|
||||
# dev.off()
|
||||
########################################################################
|
||||
|
||||
|
||||
###################################################
|
||||
# Stability dist #
|
||||
###################################################
|
||||
# scaled_cols_stability = c("duet_scaled"
|
||||
# , "deepddg_scaled"
|
||||
# , "ddg_dynamut2_scaled"
|
||||
# , "foldx_scaled"
|
||||
# , "avg_stability_scaled")
|
||||
|
||||
my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
|
||||
#plotdf = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
|
||||
|
||||
linP_dm_om = lineage_distP(merged_df2
|
||||
, with_facet = F
|
||||
, x_axis = "avg_stability_scaled"
|
||||
, y_axis = "lineage_labels"
|
||||
, x_lab = my_xlabel
|
||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||
#, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
|
||||
, fill_categ = "sensitivity"
|
||||
, fill_categ_cols = c("red", "blue")
|
||||
, label_categories = c("Resistant", "Sensitive")
|
||||
, leg_label = "Mutation group"
|
||||
, my_ats = 22 # axis text size
|
||||
, my_als = 22 # axis label size
|
||||
, my_leg_ts = 22
|
||||
, my_leg_title = 22
|
||||
, my_strip_ts = 22
|
||||
, alpha = 0.56
|
||||
)
|
||||
|
||||
linP_dm_om
|
||||
|
||||
###################################################
|
||||
# Affinity dist [OPTIONAL] #
|
||||
###################################################
|
||||
# scaled_cols_affinity = c("affinity_scaled"
|
||||
# , "mmcsm_lig_scaled"
|
||||
# , "mcsm_ppi2_scaled"
|
||||
# , "mcsm_na_scaled"
|
||||
# , "avg_affinity_scaled")
|
||||
|
||||
# lineage_distP(merged_df2
|
||||
# , with_facet = F
|
||||
# , x_axis = "avg_affinity_scaled"
|
||||
# , y_axis = "lineage_labels"
|
||||
# , x_lab = my_xlabel
|
||||
# , use_lineages = c("L1", "L2", "L3", "L4")
|
||||
# #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
|
||||
# , fill_categ = "sensitivity"
|
||||
# , fill_categ_cols = c("red", "blue")
|
||||
# , label_categories = c("Resistant", "Sensitive")
|
||||
# , leg_label = "Mutation group"
|
||||
# , my_ats = 22 # axis text size
|
||||
# , my_als = 22 # axis label size
|
||||
# , my_leg_ts = 22
|
||||
# , my_leg_title = 22
|
||||
# , my_strip_ts = 22
|
||||
# , alpha = 0.56
|
||||
# )
|
|
@ -1,5 +1,10 @@
|
|||
#!/usr/bin/env Rscript
|
||||
|
||||
###########################################
|
||||
# TASK: generate plots for lineage
|
||||
# Individual plots in
|
||||
#lineage_bp_both.R
|
||||
#linage_dist_ens_stability.R
|
||||
###########################################
|
||||
my_label_size = 25
|
||||
|
||||
|
|
|
@ -1,186 +0,0 @@
|
|||
#!/usr/bin/env Rscript
|
||||
|
||||
#########################################################
|
||||
# TASK: Lineage dist plots for stability:
|
||||
# average the four tools
|
||||
|
||||
# func from : lineage_dist.R
|
||||
# plotdf
|
||||
# , x_axis = "duet_scaled"
|
||||
# , y_axis = "lineage_labels"
|
||||
# , x_lab = "DUET"
|
||||
# , all_lineages = F
|
||||
# , use_lineages = c("L1", "L2", "L3", "L4")
|
||||
# , with_facet = F
|
||||
# , facet_wrap_var = "" # FIXME: document what this is for
|
||||
# , fill_categ = "mutation_info_labels"
|
||||
# , fill_categ_cols = c("#E69F00", "#999999")
|
||||
# , my_ats = 15 # axis text size
|
||||
# , my_als = 20 # axis label size
|
||||
# , my_leg_ts = 16
|
||||
# , my_leg_title = 16
|
||||
# , my_strip_ts = 20
|
||||
# , leg_pos = c(0.8, 0.9)
|
||||
# , leg_pos_wf = c("top", "left", "bottom", "right")
|
||||
# , leg_dir_wf = c("horizontal", "vertical")
|
||||
# , leg_label = ""
|
||||
#########################################################
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
outdir_images = paste0("~/git/Writing/thesis/images/results/"
|
||||
, tolower(gene), "/")
|
||||
cat("plots will output to:", outdir_images)
|
||||
#########################################################
|
||||
#=======
|
||||
# Data
|
||||
#=======
|
||||
df2 = merged_df2
|
||||
|
||||
#==================================
|
||||
# PREFORMATTING: for consistency
|
||||
# IMPORTANT for calculating effects
|
||||
#==================================
|
||||
head(df2$ddg_foldx)
|
||||
df2['ddg_foldxC'] = abs(df2$ddg_foldx)
|
||||
head(df2['ddg_foldxC'])
|
||||
|
||||
# reverse signs for foldx scaled values for consistency with other tools
|
||||
df2['foldx_scaled_signC'] = abs(df2$foldx_scaled)
|
||||
|
||||
# remove the old ones from
|
||||
rm_foldx_cols = c("ddg_foldx","foldx_scaled")
|
||||
raw_cols_stab_revised = raw_cols_stability[!raw_cols_stability%in%rm_foldx_cols]
|
||||
raw_cols_stab_revised = c(raw_cols_stab_revised,"ddg_foldxC")
|
||||
|
||||
scaled_cols_stab_revised = scaled_cols_stability[!scaled_cols_stability%in%rm_foldx_cols]
|
||||
scaled_cols_stab_revised = c(scaled_cols_stab_revised, "foldx_scaled_signC")
|
||||
|
||||
|
||||
#=================
|
||||
# PREFORMATTING: for consistency
|
||||
#=================
|
||||
cols_to_extract = colnames(df2)[colnames(df2)%in%c(common_cols
|
||||
, outcome_cols_stability
|
||||
, raw_cols_stability
|
||||
, scaled_cols_stability
|
||||
, raw_cols_stab_revised
|
||||
, scaled_cols_stab_revised
|
||||
, "lineage","lineage_labels")]
|
||||
|
||||
df2_plot = df2[, cols_to_extract]
|
||||
|
||||
all(table(df2_plot$lineage) == table(df2_plot$lineage_labels))
|
||||
|
||||
# find which stability cols to average: should contain revised foldx
|
||||
if ("foldx_scaled_signC"%in%colnames(df2_plot)){
|
||||
cat("\nPASS: finding stability cols to average")
|
||||
cols2avg_new = which(colnames(df2_plot)%in%scaled_cols_stab_revised)
|
||||
}else{
|
||||
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
|
||||
}
|
||||
|
||||
# ensemble average across predictors
|
||||
df2_plot['ens_stab_new'] = rowMeans(df2_plot[, cols2avg_new])
|
||||
|
||||
head(df2_plot$position); head(df2_plot$mutationinformation)
|
||||
table(df2_plot['ens_stab_new'])
|
||||
|
||||
# scaling average values
|
||||
df2_plot["ens_stab_new_scaled"] = lapply(df2_plot["ens_stab_new"]
|
||||
, function(x) {
|
||||
scales::rescale_mid(x
|
||||
, to = c(-1,1)
|
||||
, from = c( min(df2_plot["ens_stab_new"])
|
||||
, max(df2_plot["ens_stab_new"]))
|
||||
, mid = 0
|
||||
#, from = c(0,1))
|
||||
)})
|
||||
|
||||
min(df2_plot['ens_stab_new']); max(df2_plot['ens_stab_new'])
|
||||
foo = df2_plot[c("cols2avg_new", "ens_stab_new_scaled"),]
|
||||
min(df2_plot['ens_stab_new_scaled']); max(df2_plot['ens_stab_new_scaled'])
|
||||
|
||||
###########################################################
|
||||
nrow(df2_plot)
|
||||
table(df2_plot$lineage)
|
||||
table(df2_plot$lineage_labels)
|
||||
|
||||
|
||||
#===============
|
||||
#Quick numbers checks
|
||||
#===============
|
||||
nsample_lin = df2_plot[df2_plot$lineage%in%c("L1", "L2", "L3", "L4"),]
|
||||
|
||||
if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){
|
||||
cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin)
|
||||
, "\nCounting R and S samples")
|
||||
if( sum(table(nsample_lin$sensitivity)) == nrow(nsample_lin) ){
|
||||
cat("\nPASSNumbers cross checked:")
|
||||
print(table(nsample_lin$sensitivity))
|
||||
}
|
||||
}else{
|
||||
stop("Abort: Numbers mismatch. Please check")
|
||||
}
|
||||
|
||||
|
||||
#====================
|
||||
# Output Lineage plot
|
||||
#====================
|
||||
|
||||
my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
|
||||
|
||||
# linD_ens_stabP = paste0(outdir_images
|
||||
# , tolower(gene)
|
||||
# ,"_linD_ens_stabP.svg")
|
||||
#
|
||||
# cat("\nOutput plot:", linD_ens_stabP)
|
||||
# svg(linD_ens_stabP, width = 10, height = 10)
|
||||
|
||||
linP_dm_om = lineage_distP(df2_plot
|
||||
, with_facet = F
|
||||
, x_axis = "ens_stab_new_scaled"
|
||||
, y_axis = "lineage_labels"
|
||||
, x_lab = my_xlabel
|
||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||
#, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
|
||||
, fill_categ = "sensitivity"
|
||||
, fill_categ_cols = c("red", "blue")
|
||||
, label_categories = c("Resistant", "Sensitive")
|
||||
, leg_label = "Mutation group"
|
||||
, my_ats = 22 # axis text size
|
||||
, my_als = 22 # axis label size
|
||||
, my_leg_ts = 22
|
||||
, my_leg_title = 22
|
||||
, my_strip_ts = 22
|
||||
, alpha = 0.56
|
||||
)
|
||||
|
||||
linP_dm_om
|
||||
#dev.off()
|
||||
|
||||
###########################################
|
||||
my_label_size = 25
|
||||
|
||||
linPlots_combined = paste0(outdir_images
|
||||
, tolower(gene)
|
||||
,"_linP_combined.svg")
|
||||
|
||||
cat("\nOutput plot:", linPlots_combined)
|
||||
svg(linPlots_combined, width = 18, height = 12)
|
||||
|
||||
cowplot::plot_grid(
|
||||
cowplot::plot_grid(lin_countP, lin_diversityP
|
||||
, nrow = 2
|
||||
# , ncols = 2
|
||||
, labels = "AUTO"
|
||||
, label_size = my_label_size),
|
||||
NULL,
|
||||
linP_dm_om,
|
||||
nrow = 1,
|
||||
labels = c("", "", "C"),
|
||||
label_size = my_label_size,
|
||||
rel_widths = c(35, 3, 52)
|
||||
)
|
||||
dev.off()
|
|
@ -1,75 +0,0 @@
|
|||
# get plotting_dfs()
|
||||
# using dfs [lf and wf] from lineage_dfL
|
||||
# FIXME: add SNP diversity as the title
|
||||
#===============================
|
||||
# lineage sample and SNP count
|
||||
#===============================
|
||||
lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']]
|
||||
, all_lineages = F
|
||||
, x_categ = "sel_lineages"
|
||||
, y_count = "p_count"
|
||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||
, bar_fill_categ = "count_categ"
|
||||
, display_label_col = "p_count"
|
||||
, bar_stat_stype = "identity"
|
||||
, d_lab_size = 8
|
||||
, d_lab_col = "black"
|
||||
, my_xats = 25 # x axis text size
|
||||
, my_yats = 25 # y axis text sized_lab_size
|
||||
, my_xals = 25 # x axis label size
|
||||
, my_yals = 25 # y axis label size
|
||||
, my_lls = 25 # legend label size
|
||||
, bar_col_labels = c("SNPs", "Total Samples")
|
||||
, bar_col_values = c("grey50", "gray75")
|
||||
, bar_leg_name = ""
|
||||
, leg_location = "top"
|
||||
, y_log10 = F
|
||||
, y_scale_percent = FALSE
|
||||
, y_label = c("Count")
|
||||
)
|
||||
|
||||
#===============================
|
||||
# lineage SNP diversity count
|
||||
#===============================
|
||||
lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
|
||||
, x_categ = "sel_lineages"
|
||||
, y_count = "snp_diversity"
|
||||
#, all_lineages = F
|
||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||
, display_label_col = "snp_diversity_f"
|
||||
, bar_stat_stype = "identity"
|
||||
, x_lab_angle = 90
|
||||
, d_lab_size =9
|
||||
, my_xats = 25 # x axis text size
|
||||
, my_yats = 25 # y axis text size
|
||||
, my_xals = 25 # x axis label size
|
||||
, my_yals = 25 # y axis label size
|
||||
, my_lls = 25 # legend label size
|
||||
, y_log10 = F
|
||||
, y_scale_percent = F
|
||||
, leg_location = "top"
|
||||
, y_label = "Percent" #"SNP diversity"
|
||||
, bp_plot_title = "nsSNP diversity"
|
||||
, title_colour = "black" #"chocolate4"
|
||||
, subtitle_text = NULL
|
||||
, sts = 20
|
||||
, subtitle_colour = "#350E20FF")
|
||||
|
||||
#=============================================
|
||||
# Output plots: Lineage count and Diversity
|
||||
#=============================================
|
||||
# lineage_bp_CL = paste0(outdir_images
|
||||
# ,tolower(gene)
|
||||
# ,"_lineage_bp_CL_Tall.svg")
|
||||
#
|
||||
# cat("Lineage barplots:", lineage_bp_CL)
|
||||
# svg(lineage_bp_CL, width = 8, height = 15)
|
||||
#
|
||||
# cowplot::plot_grid(lin_countP, lin_diversityP
|
||||
# #, labels = c("(a)", "(b)", "(c)", "(d)")
|
||||
# , nrow = 2
|
||||
# # , ncols = 2
|
||||
# , labels = "AUTO"
|
||||
# , label_size = 25)
|
||||
#
|
||||
# dev.off()
|
Loading…
Add table
Add a link
Reference in a new issue