diff --git a/scripts/plotting/plotting_thesis/linage_bp_dist.R b/scripts/plotting/plotting_thesis/linage_bp_dist.R new file mode 100644 index 0000000..38d2d05 --- /dev/null +++ b/scripts/plotting/plotting_thesis/linage_bp_dist.R @@ -0,0 +1,172 @@ +#!/usr/bin/env Rscript + +######################################################### +# TASK: Lineage plots [merged_df2] +# Count +# Diversity +# Average stability dist +# Avergae affinity dist: optional +######################################################### +#======= +# output +#======= +# outdir_images = paste0("~/git/Writing/thesis/images/results/" +# , tolower(gene), "/") +# cat("plots will output to:", outdir_images) +######################################################### + +#=============== +#Quick numbers checks +#=============== +nsample_lin = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),] + +if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){ + cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin) + , "\nCounting R and S samples") + if( sum(table(nsample_lin$sensitivity)) == nrow(nsample_lin) ){ + cat("\nPASSNumbers cross checked:") + print(table(nsample_lin$sensitivity)) + } +}else{ + stop("Abort: Numbers mismatch. Please check") +} +######################################################################## +################################################### +# Lineage barplots # +################################################### + +#=============================== +# lineage sample and SNP count +#=============================== +lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']] + , all_lineages = F + , x_categ = "sel_lineages" + , y_count = "p_count" + , use_lineages = c("L1", "L2", "L3", "L4") + , bar_fill_categ = "count_categ" + , display_label_col = "p_count" + , bar_stat_stype = "identity" + , d_lab_size = 8 + , d_lab_col = "black" + , my_xats = 25 # x axis text size + , my_yats = 25 # y axis text sized_lab_size + , my_xals = 25 # x axis label size + , my_yals = 25 # y axis label size + , my_lls = 25 # legend label size + , bar_col_labels = c("SNPs", "Total Samples") + , bar_col_values = c("grey50", "gray75") + , bar_leg_name = "" + , leg_location = "top" + , y_log10 = F + , y_scale_percent = FALSE + , y_label = c("Count") + ) + +#=============================== +# lineage SNP diversity count +#=============================== +lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']] + , x_categ = "sel_lineages" + , y_count = "snp_diversity" + #, all_lineages = F + , use_lineages = c("L1", "L2", "L3", "L4") + , display_label_col = "snp_diversity_f" + , bar_stat_stype = "identity" + , x_lab_angle = 90 + , d_lab_size =9 + , my_xats = 25 # x axis text size + , my_yats = 25 # y axis text size + , my_xals = 25 # x axis label size + , my_yals = 25 # y axis label size + , my_lls = 25 # legend label size + , y_log10 = F + , y_scale_percent = F + , leg_location = "top" + , y_label = "Percent" #"SNP diversity" + , bp_plot_title = "nsSNP diversity" + , title_colour = "black" #"chocolate4" + , subtitle_text = NULL + , sts = 20 + , subtitle_colour = "#350E20FF") + +#============================================= +# Output plots: Lineage count and Diversity +#============================================= +# lineage_bp_CL = paste0(outdir_images +# ,tolower(gene) +# ,"_lineage_bp_CL_Tall.svg") +# +# cat("Lineage barplots:", lineage_bp_CL) +# svg(lineage_bp_CL, width = 8, height = 15) +# +# cowplot::plot_grid(lin_countP, lin_diversityP +# #, labels = c("(a)", "(b)", "(c)", "(d)") +# , nrow = 2 +# # , ncols = 2 +# , labels = "AUTO" +# , label_size = 25) +# +# dev.off() +######################################################################## + + +################################################### +# Stability dist # +################################################### +# scaled_cols_stability = c("duet_scaled" +# , "deepddg_scaled" +# , "ddg_dynamut2_scaled" +# , "foldx_scaled" +# , "avg_stability_scaled") + +my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel +#plotdf = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),] + +linP_dm_om = lineage_distP(merged_df2 + , with_facet = F + , x_axis = "avg_stability_scaled" + , y_axis = "lineage_labels" + , x_lab = my_xlabel + , use_lineages = c("L1", "L2", "L3", "L4") + #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999") + , fill_categ = "sensitivity" + , fill_categ_cols = c("red", "blue") + , label_categories = c("Resistant", "Sensitive") + , leg_label = "Mutation group" + , my_ats = 22 # axis text size + , my_als = 22 # axis label size + , my_leg_ts = 22 + , my_leg_title = 22 + , my_strip_ts = 22 + , alpha = 0.56 +) + +linP_dm_om + +################################################### +# Affinity dist [OPTIONAL] # +################################################### +# scaled_cols_affinity = c("affinity_scaled" +# , "mmcsm_lig_scaled" +# , "mcsm_ppi2_scaled" +# , "mcsm_na_scaled" +# , "avg_affinity_scaled") + +# lineage_distP(merged_df2 +# , with_facet = F +# , x_axis = "avg_affinity_scaled" +# , y_axis = "lineage_labels" +# , x_lab = my_xlabel +# , use_lineages = c("L1", "L2", "L3", "L4") +# #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999") +# , fill_categ = "sensitivity" +# , fill_categ_cols = c("red", "blue") +# , label_categories = c("Resistant", "Sensitive") +# , leg_label = "Mutation group" +# , my_ats = 22 # axis text size +# , my_als = 22 # axis label size +# , my_leg_ts = 22 +# , my_leg_title = 22 +# , my_strip_ts = 22 +# , alpha = 0.56 +# ) diff --git a/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R b/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R index 1b8c5ce..00f3adc 100644 --- a/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R +++ b/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R @@ -1,5 +1,10 @@ #!/usr/bin/env Rscript +########################################### +# TASK: generate plots for lineage +# Individual plots in +#lineage_bp_both.R +#linage_dist_ens_stability.R ########################################### my_label_size = 25 diff --git a/scripts/plotting/plotting_thesis/linage_dist_ens_stability.R b/scripts/plotting/plotting_thesis/linage_dist_ens_stability.R deleted file mode 100644 index 2071b59..0000000 --- a/scripts/plotting/plotting_thesis/linage_dist_ens_stability.R +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env Rscript - -######################################################### -# TASK: Lineage dist plots for stability: -# average the four tools - -# func from : lineage_dist.R -# plotdf -# , x_axis = "duet_scaled" -# , y_axis = "lineage_labels" -# , x_lab = "DUET" -# , all_lineages = F -# , use_lineages = c("L1", "L2", "L3", "L4") -# , with_facet = F -# , facet_wrap_var = "" # FIXME: document what this is for -# , fill_categ = "mutation_info_labels" -# , fill_categ_cols = c("#E69F00", "#999999") -# , my_ats = 15 # axis text size -# , my_als = 20 # axis label size -# , my_leg_ts = 16 -# , my_leg_title = 16 -# , my_strip_ts = 20 -# , leg_pos = c(0.8, 0.9) -# , leg_pos_wf = c("top", "left", "bottom", "right") -# , leg_dir_wf = c("horizontal", "vertical") -# , leg_label = "" -######################################################### - -#======= -# output -#======= -outdir_images = paste0("~/git/Writing/thesis/images/results/" - , tolower(gene), "/") -cat("plots will output to:", outdir_images) -######################################################### -#======= -# Data -#======= -df2 = merged_df2 - -#================================== -# PREFORMATTING: for consistency -# IMPORTANT for calculating effects -#================================== -head(df2$ddg_foldx) -df2['ddg_foldxC'] = abs(df2$ddg_foldx) -head(df2['ddg_foldxC']) - -# reverse signs for foldx scaled values for consistency with other tools -df2['foldx_scaled_signC'] = abs(df2$foldx_scaled) - -# remove the old ones from -rm_foldx_cols = c("ddg_foldx","foldx_scaled") -raw_cols_stab_revised = raw_cols_stability[!raw_cols_stability%in%rm_foldx_cols] -raw_cols_stab_revised = c(raw_cols_stab_revised,"ddg_foldxC") - -scaled_cols_stab_revised = scaled_cols_stability[!scaled_cols_stability%in%rm_foldx_cols] -scaled_cols_stab_revised = c(scaled_cols_stab_revised, "foldx_scaled_signC") - - -#================= -# PREFORMATTING: for consistency -#================= -cols_to_extract = colnames(df2)[colnames(df2)%in%c(common_cols - , outcome_cols_stability - , raw_cols_stability - , scaled_cols_stability - , raw_cols_stab_revised - , scaled_cols_stab_revised - , "lineage","lineage_labels")] - -df2_plot = df2[, cols_to_extract] - -all(table(df2_plot$lineage) == table(df2_plot$lineage_labels)) - -# find which stability cols to average: should contain revised foldx -if ("foldx_scaled_signC"%in%colnames(df2_plot)){ - cat("\nPASS: finding stability cols to average") - cols2avg_new = which(colnames(df2_plot)%in%scaled_cols_stab_revised) -}else{ - stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.") -} - -# ensemble average across predictors -df2_plot['ens_stab_new'] = rowMeans(df2_plot[, cols2avg_new]) - -head(df2_plot$position); head(df2_plot$mutationinformation) -table(df2_plot['ens_stab_new']) - -# scaling average values -df2_plot["ens_stab_new_scaled"] = lapply(df2_plot["ens_stab_new"] - , function(x) { - scales::rescale_mid(x - , to = c(-1,1) - , from = c( min(df2_plot["ens_stab_new"]) - , max(df2_plot["ens_stab_new"])) - , mid = 0 - #, from = c(0,1)) - )}) - -min(df2_plot['ens_stab_new']); max(df2_plot['ens_stab_new']) -foo = df2_plot[c("cols2avg_new", "ens_stab_new_scaled"),] -min(df2_plot['ens_stab_new_scaled']); max(df2_plot['ens_stab_new_scaled']) - -########################################################### -nrow(df2_plot) -table(df2_plot$lineage) -table(df2_plot$lineage_labels) - - -#=============== -#Quick numbers checks -#=============== -nsample_lin = df2_plot[df2_plot$lineage%in%c("L1", "L2", "L3", "L4"),] - -if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){ - cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin) - , "\nCounting R and S samples") - if( sum(table(nsample_lin$sensitivity)) == nrow(nsample_lin) ){ - cat("\nPASSNumbers cross checked:") - print(table(nsample_lin$sensitivity)) - } -}else{ - stop("Abort: Numbers mismatch. Please check") -} - - -#==================== -# Output Lineage plot -#==================== - -my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel - -# linD_ens_stabP = paste0(outdir_images -# , tolower(gene) -# ,"_linD_ens_stabP.svg") -# -# cat("\nOutput plot:", linD_ens_stabP) -# svg(linD_ens_stabP, width = 10, height = 10) - -linP_dm_om = lineage_distP(df2_plot - , with_facet = F - , x_axis = "ens_stab_new_scaled" - , y_axis = "lineage_labels" - , x_lab = my_xlabel - , use_lineages = c("L1", "L2", "L3", "L4") - #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999") - , fill_categ = "sensitivity" - , fill_categ_cols = c("red", "blue") - , label_categories = c("Resistant", "Sensitive") - , leg_label = "Mutation group" - , my_ats = 22 # axis text size - , my_als = 22 # axis label size - , my_leg_ts = 22 - , my_leg_title = 22 - , my_strip_ts = 22 - , alpha = 0.56 -) - -linP_dm_om -#dev.off() - -########################################### -my_label_size = 25 - -linPlots_combined = paste0(outdir_images - , tolower(gene) - ,"_linP_combined.svg") - -cat("\nOutput plot:", linPlots_combined) -svg(linPlots_combined, width = 18, height = 12) - -cowplot::plot_grid( - cowplot::plot_grid(lin_countP, lin_diversityP - , nrow = 2 - # , ncols = 2 - , labels = "AUTO" - , label_size = my_label_size), - NULL, - linP_dm_om, - nrow = 1, - labels = c("", "", "C"), - label_size = my_label_size, - rel_widths = c(35, 3, 52) -) -dev.off() \ No newline at end of file diff --git a/scripts/plotting/plotting_thesis/lineage_bp_both.R b/scripts/plotting/plotting_thesis/lineage_bp_both.R deleted file mode 100644 index a723bfd..0000000 --- a/scripts/plotting/plotting_thesis/lineage_bp_both.R +++ /dev/null @@ -1,75 +0,0 @@ -# get plotting_dfs() -# using dfs [lf and wf] from lineage_dfL -# FIXME: add SNP diversity as the title -#=============================== -# lineage sample and SNP count -#=============================== -lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']] - , all_lineages = F - , x_categ = "sel_lineages" - , y_count = "p_count" - , use_lineages = c("L1", "L2", "L3", "L4") - , bar_fill_categ = "count_categ" - , display_label_col = "p_count" - , bar_stat_stype = "identity" - , d_lab_size = 8 - , d_lab_col = "black" - , my_xats = 25 # x axis text size - , my_yats = 25 # y axis text sized_lab_size - , my_xals = 25 # x axis label size - , my_yals = 25 # y axis label size - , my_lls = 25 # legend label size - , bar_col_labels = c("SNPs", "Total Samples") - , bar_col_values = c("grey50", "gray75") - , bar_leg_name = "" - , leg_location = "top" - , y_log10 = F - , y_scale_percent = FALSE - , y_label = c("Count") - ) - -#=============================== -# lineage SNP diversity count -#=============================== -lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']] - , x_categ = "sel_lineages" - , y_count = "snp_diversity" - #, all_lineages = F - , use_lineages = c("L1", "L2", "L3", "L4") - , display_label_col = "snp_diversity_f" - , bar_stat_stype = "identity" - , x_lab_angle = 90 - , d_lab_size =9 - , my_xats = 25 # x axis text size - , my_yats = 25 # y axis text size - , my_xals = 25 # x axis label size - , my_yals = 25 # y axis label size - , my_lls = 25 # legend label size - , y_log10 = F - , y_scale_percent = F - , leg_location = "top" - , y_label = "Percent" #"SNP diversity" - , bp_plot_title = "nsSNP diversity" - , title_colour = "black" #"chocolate4" - , subtitle_text = NULL - , sts = 20 - , subtitle_colour = "#350E20FF") - -#============================================= -# Output plots: Lineage count and Diversity -#============================================= -# lineage_bp_CL = paste0(outdir_images -# ,tolower(gene) -# ,"_lineage_bp_CL_Tall.svg") -# -# cat("Lineage barplots:", lineage_bp_CL) -# svg(lineage_bp_CL, width = 8, height = 15) -# -# cowplot::plot_grid(lin_countP, lin_diversityP -# #, labels = c("(a)", "(b)", "(c)", "(d)") -# , nrow = 2 -# # , ncols = 2 -# , labels = "AUTO" -# , label_size = 25) -# -# dev.off() \ No newline at end of file