From 7dc72fb1180962217d390f783608ea39b3c1e189 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 7 Sep 2021 09:27:47 +0100 Subject: [PATCH] replaced old lineage barplot with count and diversity combined plots sourced from function --- scripts/functions/bp_lineage.R | 12 +- scripts/functions/test_bp_lineage.R | 107 +++------ scripts/plotting/basic_barplots_combined.R | 2 +- scripts/plotting/lineage_basic_barplot.R | 214 ------------------ .../lineage_basic_barplots_combined.R | 128 +++++++++++ scripts/plotting/lineage_bp_data.R | 112 ++++----- scripts/plotting/running_plotting_scripts.txt | 8 + 7 files changed, 217 insertions(+), 366 deletions(-) delete mode 100644 scripts/plotting/lineage_basic_barplot.R create mode 100644 scripts/plotting/lineage_basic_barplots_combined.R diff --git a/scripts/functions/bp_lineage.R b/scripts/functions/bp_lineage.R index 6b7090c..ad43386 100644 --- a/scripts/functions/bp_lineage.R +++ b/scripts/functions/bp_lineage.R @@ -20,8 +20,8 @@ lin_count_bp <- function( lf_data , my_xals = 22 # x axis label size , my_yals = 22 # y axis label size , my_lls = 22 # legend label size - , bar_col_labels = c("Mutations", "Total Samples") - , bar_col_values = c("grey50", "gray75") + , bar_col_labels = "" + , bar_col_values = "" , bar_leg_name = "" , leg_location = "top" , y_log10 = FALSE @@ -68,20 +68,18 @@ lin_count_bp <- function( lf_data , y = y_label , colour = "black") - if (y_log10){ OutPlot = OutPlot + scale_y_continuous(trans = "log10" , labels = trans_format("log10", math_format(10^.x) ) ) - - } + } if (y_scale_percent){ OutPlot = OutPlot + - #scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + - scale_y_continuous(labels = scales::percent) + + scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + + #scale_y_continuous(labels = scales::percent) + labs(title = "" , x = "" diff --git a/scripts/functions/test_bp_lineage.R b/scripts/functions/test_bp_lineage.R index 6742429..876f237 100644 --- a/scripts/functions/test_bp_lineage.R +++ b/scripts/functions/test_bp_lineage.R @@ -1,39 +1,25 @@ setwd("~/git/LSHTM_analysis/scripts/plotting") -#source ('get_plotting_dfs.R') +source ('get_plotting_dfs.R') source("../functions/bp_lineage.R") ######################################### # Lineage and SNP count: lineage lf data ######################################### -# Relevel factors so that x-axis categ appear as you want -lin_lf_plot = lin_lf -lin_lf_plot -is.factor(lin_lf_plot$sel_lineages_f) - -lin_lf_plot$sel_lineages_f = factor(lin_lf_plot$sel_lineages_f, c("" - , "L1" - , "L2" - , "L3" - , "L4" - , "L5" - , "L6" - , "L7" - , "LBOV" - , "L1;L2" - , "L1;L3" - , "L1;L4" - , "L2;L3" - , "L2;L3;L4" - , "L2;L4" - , "L2;L6" - , "L2;LBOV" - , "L3;L4" - , "L4;L6" - , "L4;L7")) +#========================= +# Data: All lineages or +# selected few +#========================= +sel_lineages = levels(lin_lf$sel_lineages_f) +sel_lineages +lin_lf_plot = lin_lf[lin_lf$sel_lineages_f%in%sel_lineages,] +# drop unused factor levels +lin_lf_plot$sel_lineages_f = factor(lin_lf_plot$sel_lineages_f) levels(lin_lf_plot$sel_lineages_f) - +#========================= +# Lineage count plot +#========================= lin_count_bp(lin_lf_plot , x_categ = "sel_lineages_f" , y_count = "p_count" @@ -44,68 +30,33 @@ lin_count_bp(lin_lf_plot , my_xats = 20 , bar_col_labels = c("Mutations", "Total Samples") , bar_col_values = c("grey50", "gray75") - , y_log10 = T - , y_label = "Count" - , y_scale_percent = F) + , y_scale_percent = F # T for diversity + , y_log10 = F + , y_label = "Count") ############################################### # Lineage SNP diversity count: lineage wf data ############################################### -# Relevel factors so that x-axis categ appear as you want -lin_wf_plot = lin_wf -is.factor(lin_wf_plot$sel_lineages_f) - -lin_wf_plot$sel_lineages_f = factor(lin_wf_plot$sel_lineages_f, c("" - , "L1" - , "L2" - , "L3" - , "L4" - , "L5" - , "L6" - , "L7" - , "LBOV" - , "L1;L2" - , "L1;L3" - , "L1;L4" - , "L2;L3" - , "L2;L3;L4" - , "L2;L4" - , "L2;L6" - , "L2;LBOV" - , "L3;L4" - , "L4;L6" - , "L4;L7")) +#========================= +# Data: All lineages or +# selected few +#========================= +sel_lineages = levels(lin_wf$sel_lineages_f) +sel_lineages +lin_wf_plot = lin_wf[lin_wf$sel_lineages_f%in%sel_lineages,] +# drop unused factor levels +lin_wf_plot$sel_lineages_f = factor(lin_wf_plot$sel_lineages_f) levels(lin_wf_plot$sel_lineages_f) - -#========== -# Plot -#========== +#========================= +# Lineage Diversity plot +#========================= lin_count_bp(lin_wf_plot , x_categ = "sel_lineages_f" , y_count = "snp_diversity" , display_label_col = "snp_diversity_f" - , bar_stat_stype = "identity" , x_lab_angle = 90 , my_xats = 20 , y_scale_percent = T - , y_label = "SNP diversity" - - -) - - - - -, x_categ = "sel_lineages_f" -, y_count = "p_count" -, bar_fill_categ = "count_categ" -, display_label_col = "p_count" -, bar_stat_stype = "identity" -, x_lab_angle = 90 -, my_xats = 15 -, bar_col_labels = c("Mutations", "Total Samples") -, bar_col_values = c("grey50", "gray75") -, y_log10 = T -, y_scale_percent = F \ No newline at end of file + , y_label = "SNP diversity") diff --git a/scripts/plotting/basic_barplots_combined.R b/scripts/plotting/basic_barplots_combined.R index 7fee2d7..2643b0d 100644 --- a/scripts/plotting/basic_barplots_combined.R +++ b/scripts/plotting/basic_barplots_combined.R @@ -23,7 +23,7 @@ plot_basic_bp_combined_labelled = paste0(plotdir,"/", basic_bp_combined_labell #======================================================================= #======= -# combin DUET and Ligand affinity plots +# combine DUET and Ligand affinity plots #======= svg(plot_basic_bp_combined_labelled , width = 12, height = 12 ) diff --git a/scripts/plotting/lineage_basic_barplot.R b/scripts/plotting/lineage_basic_barplot.R deleted file mode 100644 index e4503d1..0000000 --- a/scripts/plotting/lineage_basic_barplot.R +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env Rscript -getwd() -setwd("~/git/LSHTM_analysis/scripts/plotting/") -getwd() -######################################################### -# TASK: Basic lineage barplot showing numbers - -# Output: Basic barplot with lineage samples and mut count - -########################################################## -# Installing and loading required packages -########################################################## -source("Header_TT.R") -require(data.table) -source("combining_dfs_plotting.R") -# should return the following dfs, directories and variables - -# PS combined: -# 1) merged_df2 -# 2) merged_df2_comp -# 3) merged_df3 -# 4) merged_df3_comp - -# LIG combined: -# 5) merged_df2_lig -# 6) merged_df2_comp_lig -# 7) merged_df3_lig -# 8) merged_df3_comp_lig - -# 9) my_df_u -# 10) my_df_u_lig - -cat("Directories imported:" - , "\n====================" - , "\ndatadir:", datadir - , "\nindir:", indir - , "\noutdir:", outdir - , "\nplotdir:", plotdir) - -cat("Variables imported:" - , "\n=====================" - , "\ndrug:", drug - , "\ngene:", gene - , "\ngene_match:", gene_match - , "\nAngstrom symbol:", angstroms_symbol - , "\nNo. of duplicated muts:", dup_muts_nu - , "\nNA count for ORs:", na_count - , "\nNA count in df2:", na_count_df2 - , "\nNA count in df3:", na_count_df3 - , "\ndr_muts_col:", dr_muts_col - , "\nother_muts_col:", other_muts_col - , "\ndrtype_col:", resistance_col) - - -#=========== -# input -#=========== -# output of combining_dfs_plotting.R - -#======= -# output -#======= -# plot 1 -basic_bp_lineage = "basic_lineage_barplot.svg" -plot_basic_bp_lineage = paste0(plotdir,"/", basic_bp_lineage) - -#======================================================================= -#================ -# Data for plots: -# you need merged_df2, comprehensive one -# since this has one-many relationship -# i.e the same SNP can belong to multiple lineages -#================ -# REASSIGNMENT as necessary -my_df = merged_df2 - -# clear excess variable -rm(merged_df2_comp, merged_df3, merged_df3_comp) - -# quick checks -colnames(my_df) -str(my_df) - -# Ensure correct data type in columns to plot: need to be factor -is.factor(my_df$lineage) -my_df$lineage = as.factor(my_df$lineage) -is.factor(my_df$lineage) - -#========================== -# Plot: Lineage barplot -# x = lineage y = No. of samples -# col = Lineage -# fill = lineage -#============================ -table(my_df$lineage) -as.data.frame(table(my_df$lineage)) - -#============= -# Data for plots -#============= -# REASSIGNMENT -df <- my_df - -rm(my_df) - -# get freq count of positions so you can subset freq<1 -#setDT(df)[, lineage_count := .N, by = .(lineage)] - -#****************** -# generate plot: barplot of mutation by lineage -#****************** -sel_lineages = c("lineage1" - , "lineage2" - , "lineage3" - , "lineage4" - #, "lineage5" - #, "lineage6" - #, "lineage7" - ) - -df_lin = subset(df, subset = lineage %in% sel_lineages) - -# Create df with lineage inform & no. of unique mutations -# per lineage and total samples within lineage -# this is essentially barplot with two y axis - -bar = bar = as.data.frame(sel_lineages) #4, 1 -total_snps_u = NULL -total_samples = NULL - -for (i in sel_lineages){ - #print(i) - curr_total = length(unique(df$id)[df$lineage==i]) - total_samples = c(total_samples, curr_total) - print(total_samples) - - foo = df[df$lineage==i,] - print(paste0(i, "=======")) - print(length(unique(foo$mutationinformation))) - curr_count = length(unique(foo$mutationinformation)) - - total_snps_u = c(total_snps_u, curr_count) -} - -print(total_snps_u) -bar$num_snps_u = total_snps_u -bar$total_samples = total_samples -bar - -#***************** -# generate plot: lineage barplot with two y-axis -#https://stackoverflow.com/questions/13035295/overlay-bar-graphs-in-ggplot2 -#***************** - -y1 = bar$num_snps_u -y2 = bar$total_samples -x = sel_lineages - -to_plot = data.frame(x = x - , y1 = y1 - , y2 = y2) -to_plot - -# FIXME later: will be depricated! -melted = melt(to_plot, id = "x") -melted - - -svg(plot_basic_bp_lineage) - -my_ats = 20 # axis text size -my_als = 22 # axis label size - -g = ggplot(melted, aes(x = x - , y = value - , fill = variable)) - -printFile = g + geom_bar(stat = "identity" - , position = position_stack(reverse = TRUE) - , alpha=.75 - , colour='grey75') + - theme(axis.text.x = element_text(size = my_ats) - , axis.text.y = element_text(size = my_ats - #, angle = 30 - , hjust = 1 - , vjust = 0) - , axis.title.x = element_text(size = my_als - , colour = 'black') - , axis.title.y = element_text(size = my_als - , colour = 'black') - , legend.position = "top" - , legend.text = element_text(size = my_als)) + - #geom_text() + - geom_label(aes(label = value) - , size = 5 - , hjust = 0.5 - , vjust = 0.5 - , colour = 'black' - , show.legend = FALSE - #, check_overlap = TRUE - , position = position_stack(reverse = T)) + - labs(title = '' - , x = '' - , y = "Number" - , fill = 'Variable' - , colour = 'black') + - scale_fill_manual(values = c('grey50', 'gray75') - , name='' - , labels=c('Mutations', 'Total Samples')) + - scale_x_discrete(breaks = c('lineage1', 'lineage2', 'lineage3', 'lineage4') - , labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4')) - -print(printFile) -dev.off() diff --git a/scripts/plotting/lineage_basic_barplots_combined.R b/scripts/plotting/lineage_basic_barplots_combined.R new file mode 100644 index 0000000..4b63587 --- /dev/null +++ b/scripts/plotting/lineage_basic_barplots_combined.R @@ -0,0 +1,128 @@ +#!/usr/bin/env Rscript +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() +######################################################### +# TASK: Basic lineage barplot showing numbers + +# Output: Basic barplot with lineage samples and mut count + +########################################################## +# Installing and loading required packages +########################################################## +source("Header_TT.R") +source("../functions/bp_lineage.R") + +#=========== +# input +#=========== +source ('get_plotting_dfs.R') + +cat("Directories imported:" + , "\n====================" + , "\ndatadir:", datadir + , "\nindir:", indir + , "\noutdir:", outdir + , "\nplotdir:", plotdir) + +cat("Variables imported:" + , "\n=====================" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nAngstrom symbol:", angstroms_symbol + #, "\nNo. of duplicated muts:", dup_muts_nu + , "\ndr_muts_col:", dr_muts_col + , "\nother_muts_col:", other_muts_col + , "\ndrtype_col:", resistance_col) + +#======= +# output +#======= +# plot 1 +basic_bp_lineage_cl = "basic_lineage_barplots_combined.svg" +plot_basic_bp_lineage_cl = paste0(plotdir,"/", basic_bp_lineage_cl) +plot_basic_bp_lineage_cl +################################################################# +#============================= +# PLOT 1: Lineage count plot: +# LF data +#============================= +#------------------------ +# Data: All lineages or +# selected few +#------------------------ +sel_lineages = levels(lin_lf$sel_lineages_f)[1:4] +sel_lineages +lin_lf_plot = lin_lf[lin_lf$sel_lineages_f%in%sel_lineages,] +str(lin_lf_plot) + +# drop unused factor levels +lin_lf_plot$sel_lineages_f = factor(lin_lf_plot$sel_lineages_f) +levels(lin_lf_plot$sel_lineages_f) +str(lin_lf_plot) + +#------------------------ +# plot from my function: +#------------------------ +lin_countP = lin_count_bp(lin_lf_plot + , x_categ = "sel_lineages_f" + , y_count = "p_count" + , bar_fill_categ = "count_categ" + , display_label_col = "p_count" + , bar_stat_stype = "identity" + , x_lab_angle = 90 + , my_xats = 20 + , bar_col_labels = c("Mutations", "Total Samples") + , bar_col_values = c("grey50", "gray75") + , y_scale_percent = F # T for diversity + , y_log10 = F + , y_label = "Count") +lin_countP +#================================ +# PLOT 2: Lineage Diversity plot +# WF data +#================================ +#------------------------ +# Data: All lineages or +# selected few +#------------------------ +sel_lineages = levels(lin_wf$sel_lineages_f)[1:4] +sel_lineages +lin_wf_plot = lin_wf[lin_wf$sel_lineages_f%in%sel_lineages,] +str(lin_wf_plot) + +# drop unused factor levels +lin_wf_plot$sel_lineages_f = factor(lin_wf_plot$sel_lineages_f) +levels(lin_wf_plot$sel_lineages_f) +str(lin_wf_plot) + +#------------------------ +# plot from my function: +#------------------------ +lin_diversityP = lin_count_bp(lin_wf_plot + , x_categ = "sel_lineages_f" + , y_count = "snp_diversity" + , display_label_col = "snp_diversity_f" + , bar_stat_stype = "identity" + , x_lab_angle = 90 + , my_xats = 20 + , y_scale_percent = T + , y_label = "SNP diversity") + +lin_diversityP +#########################################################################333 +#================================ +# Combine plots +#================================ + +svg(plot_basic_bp_lineage_cl , width = 8, height = 15 ) + +lineage_bp_combined = cowplot::plot_grid(lin_countP, lin_diversityP + #, labels = c("(a)", "(b)", "(c)", "(d)") + , nrow = 2 + , labels = "AUTO" + , label_size = 25) + +lineage_bp_combined +dev.off() diff --git a/scripts/plotting/lineage_bp_data.R b/scripts/plotting/lineage_bp_data.R index 2cdfbe8..2d51ab0 100644 --- a/scripts/plotting/lineage_bp_data.R +++ b/scripts/plotting/lineage_bp_data.R @@ -68,42 +68,35 @@ lin_wf #===================== # SNP diversity lin_wf$snp_diversity_f = round( (lin_wf$snp_diversity * 100), digits = 0) -lin_wf +lin_wf$snp_diversity_f = paste0(lin_wf$snp_diversity_f, "%") # Lineage names lin_wf$sel_lineages_f = gsub("lineage", "L", lin_wf$sel_lineages) lin_wf -# # Lineage names -# lin_wf = lin_wf %>% -# mutate(ordering_category = case_when( -# sel_lineages_f == "" ~ 0 -# , sel_lineages_f == "L1" ~ 1 -# , sel_lineages_f == "L2" ~ 2 -# , sel_lineages_f == "L3" ~ 3 -# , sel_lineages_f == "L4" ~ 4 -# , sel_lineages_f == "L5" ~ 5 -# , sel_lineages_f == "L6" ~ 6 -# , sel_lineages_f == "L7" ~ 7 -# , sel_lineages_f == "LBOV" ~ 8 -# -# , sel_lineages_f == "L1;L2" ~ 9 -# , sel_lineages_f == "L1;L3" ~ 10 -# , sel_lineages_f == "L1;L4" ~ 11 -# -# , sel_lineages_f == "L2;L3" ~ 12 -# , sel_lineages_f == "L2;L3;L4" ~ 13 -# , sel_lineages_f == "L2;L4" ~ 14 -# , sel_lineages_f == "L2;L6" ~ 15 -# , sel_lineages_f == "L2;LBOV" ~ 16 -# -# , sel_lineages_f == "L3;L4" ~ 17 -# -# , sel_lineages_f == "L4;L6" ~ 18 -# , sel_lineages_f == "L4;L7" ~ 19 -# -# , FALSE ~ -1) -# ) +# Important: Relevel factors so that x-axis categ appear as you want +lin_wf$sel_lineages_f = factor(lin_wf$sel_lineages_f, c("L1" + , "L2" + , "L3" + , "L4" + , "L5" + , "L6" + , "L7" + , "LBOV" + , "L1;L2" + , "L1;L3" + , "L1;L4" + , "L2;L3" + , "L2;L3;L4" + , "L2;L4" + , "L2;L6" + , "L2;LBOV" + , "L3;L4" + , "L4;L6" + , "L4;L7" + , "")) + +levels(lin_wf$sel_lineages_f) ################################## # LF data: lineages with @@ -134,40 +127,27 @@ if ( nrow(lin_lf) == expected_rows ){ cat("\nFAIL: numbers mismatch" , "\nExpected nrow: ", expected_rows) } -####################################### -# #===================== -# # Add some formatting -# #===================== -# lin_lf$sel_lineages_f = gsub("lineage", "L", lin_lf$sel_lineages) -# lin_lf +# Important: Relevel factors so that x-axis categ appear as you want +lin_lf$sel_lineages_f = factor(lin_lf$sel_lineages_f, c("L1" + , "L2" + , "L3" + , "L4" + , "L5" + , "L6" + , "L7" + , "LBOV" + , "L1;L2" + , "L1;L3" + , "L1;L4" + , "L2;L3" + , "L2;L3;L4" + , "L2;L4" + , "L2;L6" + , "L2;LBOV" + , "L3;L4" + , "L4;L6" + , "L4;L7" + , "")) -# lin_lf = lin_lf %>% -# mutate(ordering_category = case_when( -# sel_lineages_f == "" ~ 0 -# , sel_lineages_f == "L1" ~ 1 -# , sel_lineages_f == "L2" ~ 2 -# , sel_lineages_f == "L3" ~ 3 -# , sel_lineages_f == "L4" ~ 4 -# , sel_lineages_f == "L5" ~ 5 -# , sel_lineages_f == "L6" ~ 6 -# , sel_lineages_f == "L7" ~ 7 -# , sel_lineages_f == "LBOV" ~ 8 -# -# , sel_lineages_f == "L1;L2" ~ 9 -# , sel_lineages_f == "L1;L3" ~ 10 -# , sel_lineages_f == "L1;L4" ~ 11 -# -# , sel_lineages_f == "L2;L3" ~ 12 -# , sel_lineages_f == "L2;L3;L4" ~ 13 -# , sel_lineages_f == "L2;L4" ~ 14 -# , sel_lineages_f == "L2;L6" ~ 15 -# , sel_lineages_f == "L2;LBOV" ~ 16 -# -# , sel_lineages_f == "L3;L4" ~ 17 -# -# , sel_lineages_f == "L4;L6" ~ 18 -# , sel_lineages_f == "L4;L7" ~ 19 -# -# , FALSE ~ -1) -# ) +levels(lin_lf$sel_lineages_f) diff --git a/scripts/plotting/running_plotting_scripts.txt b/scripts/plotting/running_plotting_scripts.txt index 8d7ba0c..332293e 100644 --- a/scripts/plotting/running_plotting_scripts.txt +++ b/scripts/plotting/running_plotting_scripts.txt @@ -112,6 +112,14 @@ note: - fa flag has default if not supplied - fb flag has default if not supplied + +#=================== +# Add LINEAGE ONE +#=================== +# Lineage_bp.R +creates Count and Diversity plot + + ######################################################################## # TODO Delete: dirs.R