From b7d50fbbcd15d0a78b6cfa04b27520759e9e8e47 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 9 Sep 2021 16:10:11 +0100 Subject: [PATCH] added lineage_labels and mutation_info_labels to combinig_dfs_plotting --- scripts/functions/combining_dfs_plotting.R | 34 ++++ scripts/functions/tests/test_lineage_dist.R | 3 +- .../lineage_basic_barplots_combined.R | 8 +- scripts/plotting/lineage_bp_data.R | 180 ------------------ 4 files changed, 38 insertions(+), 187 deletions(-) delete mode 100755 scripts/plotting/lineage_bp_data.R diff --git a/scripts/functions/combining_dfs_plotting.R b/scripts/functions/combining_dfs_plotting.R index 18e0374..848face 100644 --- a/scripts/functions/combining_dfs_plotting.R +++ b/scripts/functions/combining_dfs_plotting.R @@ -152,6 +152,40 @@ combining_dfs_plotting <- function( my_df_u unique(meta_muts_u[! meta_muts_u %in% merged_muts_u]) quit() } + + # Quick formatting: pretty labels + #----------------------- + # mutation_info_labels + #----------------------- + merged_df2$mutation_info_labels = ifelse(merged_df2$mutation_info == dr_muts_col + , "DM", "OM") + merged_df2$mutation_info_labels = factor(merged_df2$mutation_info_labels) + #----------------------- + # lineage labels + #----------------------- + merged_df2$lineage_labels = gsub("lineage", "L", merged_df2$lineage) + + merged_df2$lineage_labels = factor(merged_df2$lineage_labels, c("L1" + , "L2" + , "L3" + , "L4" + , "L5" + , "L6" + , "L7" + , "LBOV" + , "L1;L2" + , "L1;L3" + , "L1;L4" + , "L2;L3" + , "L2;L3;L4" + , "L2;L4" + , "L2;L6" + , "L2;LBOV" + , "L3;L4" + , "L4;L6" + , "L4;L7" + , "")) + #================================================================= # Merge 2: merged_df3 diff --git a/scripts/functions/tests/test_lineage_dist.R b/scripts/functions/tests/test_lineage_dist.R index 1f40d16..eeeebe5 100644 --- a/scripts/functions/tests/test_lineage_dist.R +++ b/scripts/functions/tests/test_lineage_dist.R @@ -16,12 +16,13 @@ cat("cols imported:" ############################################################# - +# without facet lineage_distP(lin_dist_plot , with_facet = F , leg_label = "Mutation Class" ) +# without facet lineage_distP(lin_dist_plot , with_facet = T , facet_wrap_var = "mutation_info_labels" diff --git a/scripts/plotting/lineage_basic_barplots_combined.R b/scripts/plotting/lineage_basic_barplots_combined.R index b6f25e6..837e57b 100755 --- a/scripts/plotting/lineage_basic_barplots_combined.R +++ b/scripts/plotting/lineage_basic_barplots_combined.R @@ -55,9 +55,7 @@ plot_basic_bp_lineage_cl # Data: All lineages or # selected few #------------------------ -sel_lineages = levels(lin_lf$sel_lineages)[1:4] -sel_lineages -lin_lf_plot = lin_lf[lin_lf$sel_lineages%in%sel_lineages,] +lin_lf_plot = lin_lf[lin_lf$sel_lineages%in%c("L1", "L2", "L3", "L4"),] str(lin_lf_plot) # drop unused factor levels @@ -90,9 +88,7 @@ lin_countP # Data: All lineages or # selected few #------------------------ -sel_lineages = levels(lin_wf$sel_lineages)[1:4] -sel_lineages -lin_wf_plot = lin_wf[lin_wf$sel_lineages%in%sel_lineages,] +lin_wf_plot = lin_wf[lin_wf$sel_lineages%in%c("L1", "L2", "L3", "L4"),] str(lin_wf_plot) # drop unused factor levels diff --git a/scripts/plotting/lineage_bp_data.R b/scripts/plotting/lineage_bp_data.R deleted file mode 100755 index e9ab929..0000000 --- a/scripts/plotting/lineage_bp_data.R +++ /dev/null @@ -1,180 +0,0 @@ -#!/usr/bin/env Rscript -######################################################### -# TASK: Script to format data for lineage barplots: -# WF and LF data with lineage sample, and snp counts -# sourced by get_plotting_dfs.R -######################################################### -# working dir and loading libraries -# getwd() -# setwd("~/git/LSHTM_analysis/scripts/plotting") -# getwd() - -# make cmd -# globals -# drug = "streptomycin" -# gene = "gid" - -# source("get_plotting_dfs.R") -#======================================================================= -################################################# -# Get data with lineage count, and snp diversity -################################################# -table(merged_df2$lineage) - -if (table(merged_df2$lineage == "")[[2]]) { - -cat("\nMissing samples with lineage classification:", table(merged_df2$lineage == "")[[2]]) - -} - -# Add pretty lineage labels and mut_info_labels -class(merged_df2$lineage); table(merged_df2$lineage) -merged_df2$lineage_labels = gsub("lineage", "L", merged_df2$lineage) -table(merged_df2$lineage_labels) - -class(merged_df2$lineage_labels) - -merged_df2$lineage_labels = factor(merged_df2$lineage_labels, c("L1" - , "L2" - , "L3" - , "L4" - , "L5" - , "L6" - , "L7" - , "LBOV" - , "L1;L2" - , "L1;L3" - , "L1;L4" - , "L2;L3" - , "L2;L3;L4" - , "L2;L4" - , "L2;L6" - , "L2;LBOV" - , "L3;L4" - , "L4;L6" - , "L4;L7" - , "")) - -class(merged_df2$lineage_labels); nlevels(merged_df2$lineage_labels) - - -################################## -# WF data: lineages with -# snp count -# total_samples -# snp diversity (perc) -################################## -sel_lineages = levels(merged_df2$lineage_labels) - -lin_wf = data.frame(sel_lineages) #4, 1 -total_snps_u = NULL -total_samples = NULL - -for (i in sel_lineages){ - #print(i) - curr_total = length(unique(merged_df2$id)[merged_df2$lineage_labels==i]) - #print(curr_total) - total_samples = c(total_samples, curr_total) - print(total_samples) - - foo = merged_df2[merged_df2$lineage_labels==i,] - print(paste0(i, "=======\n")) - print(length(unique(foo$mutationinformation))) - curr_count = length(unique(foo$mutationinformation)) - - total_snps_u = c(total_snps_u, curr_count) -} -lin_wf - -# Add these counts as columns to the df -lin_wf$num_snps_u = total_snps_u -lin_wf$total_samples = total_samples - -# Add SNP diversity -lin_wf$snp_diversity = lin_wf$num_snps_u/lin_wf$total_samples -lin_wf - -#===================== -# Add some formatting -#===================== -# SNP diversity -lin_wf$snp_diversity_f = round( (lin_wf$snp_diversity * 100), digits = 0) -lin_wf$snp_diversity_f = paste0(lin_wf$snp_diversity_f, "%") - -# Important: Check factors so that x-axis categ appear as you want -lin_wf$sel_lineages = factor(lin_wf$sel_lineages, c("L1" - , "L2" - , "L3" - , "L4" - , "L5" - , "L6" - , "L7" - , "LBOV" - , "L1;L2" - , "L1;L3" - , "L1;L4" - , "L2;L3" - , "L2;L3;L4" - , "L2;L4" - , "L2;L6" - , "L2;LBOV" - , "L3;L4" - , "L4;L6" - , "L4;L7" - , "")) - -levels(lin_wf$sel_lineages) - -################################## -# LF data: lineages with -# snp count -# total_samples -# snp diversity (perc) -################################## -names(lin_wf) -tot_cols = ncol(lin_wf) -pivot_cols = c("sel_lineages", "snp_diversity", "snp_diversity_f") -pivot_cols_n = length(pivot_cols) - -expected_rows = nrow(lin_wf) * ( length(lin_wf) - pivot_cols_n ) - -lin_lf <- gather(lin_wf - , count_categ - , p_count - , num_snps_u:total_samples - , factor_key = TRUE) -lin_lf - -# quick checks -if ( nrow(lin_lf) == expected_rows ){ - cat("\nPASS: Lineage LF data created" - , "\nnrow: ", nrow(lin_lf) - , "\nncol: ", ncol(lin_lf)) -} else { - cat("\nFAIL: numbers mismatch" - , "\nExpected nrow: ", expected_rows) -} - -# Important: Relevel factors so that x-axis categ appear as you want -lin_lf$sel_lineages = factor(lin_lf$sel_lineages, c("L1" - , "L2" - , "L3" - , "L4" - , "L5" - , "L6" - , "L7" - , "LBOV" - , "L1;L2" - , "L1;L3" - , "L1;L4" - , "L2;L3" - , "L2;L3;L4" - , "L2;L4" - , "L2;L6" - , "L2;LBOV" - , "L3;L4" - , "L4;L6" - , "L4;L7" - , "")) - -levels(lin_lf$sel_lineages)