From 5c018e23be07e5f11a7334d62bc6468458b08e1b Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 10 Jun 2021 14:46:11 +0100 Subject: [PATCH] added function for position_count_bp.R --- .../plotting/functions/position_count_bp.R | 125 ++++++++++++++++++ .../plotting/functions/stability_count_bp.R | 8 +- scripts/plotting/functions/test_functions.R | 56 +++++--- 3 files changed, 164 insertions(+), 25 deletions(-) create mode 100755 scripts/plotting/functions/position_count_bp.R diff --git a/scripts/plotting/functions/position_count_bp.R b/scripts/plotting/functions/position_count_bp.R new file mode 100755 index 0000000..071d1cc --- /dev/null +++ b/scripts/plotting/functions/position_count_bp.R @@ -0,0 +1,125 @@ +#!/usr/bin/env Rscript + +######################################################### +# TASK: function for barplot showing no. of sites with nsSNP +# count +######################################################### +# load libraries and functions +library(ggplot2) +library(data.table) +library(dplyr) + +theme_set(theme_grey()) +#================================================================= +# site_snp_count_bp(): barplots for no. of sites and nsSNP count +# input args +## df containing data to plot +## df column name containing site/position numbers +## ...opt args + +# TODO: +# legend params, currently it appears as title +# visually might be nicer for it to be inside the plot +#================================================================= + +site_snp_count_bp <- function (plotdf + , df_colname = "position" + #, bp_plot_title = "" + #, leg_title = "Legend title" + #, leg_text_size = 20 + , axis_text_size = 25 + , axis_label_size = 22 + , xaxis_title = "Number of nsSNPs" + , yaxis_title = "Number of Sites"){ + + # dim of plotdf + cat(paste0("\noriginal df dimensions:" + , "\nNo. of rows:", nrow(plotdf) + , "\nNo. of cols:", ncol(plotdf) + , "\nNow adding column: frequency of mutational positions")) + + # adding snpcount for each position + setDT(plotdf)[, pos_count := .N, by = .(eval(parse(text = df_colname)))] + + cat("\nCumulative nssnp count\n" + , table(plotdf$pos_count)) + + # calculating total no. of mutations + tot_muts = sum(table(plotdf$pos_count)) + + # sanity check + if(tot_muts == nrow(plotdf)){ + cat("\nPASS: total number of mutations match" + , "\nTotal no. of nsSNPs:", tot_muts) + } else{ + cat("\nWARNING: total no. of muts = ", tot_muts + , "\nExpected = ", nrow(plotdf)) + } + + # revised dimensions + cat(paste0("\nrevised df dimensions:" + , "\nNo. of rows:", nrow(plotdf) + , "\nNo. of cols:", ncol(plotdf))) + + # use group by on pos_count + snpsBYpos_df <- plotdf %>% + group_by(eval(parse(text = df_colname))) %>% + summarize(snpsBYpos = mean(pos_count)) + + cat("\nnssnp count\n" + , table(snpsBYpos_df$snpsBYpos)) + + # calculating total no. of sites associated with nsSNPs + tot_sites = sum(table(snpsBYpos_df$snpsBYpos)) + + # sanity check + if(tot_sites == length(unique(plotdf$position))){ + cat("\nPASS: total number of mutation sites match" + , "\nTotal no. of sites with nsSNPs:", tot_sites) + } else{ + cat("WARNING: total no. of sites = ", tot_sites + , "\nExpected = ", length(unique(plotdf$position))) + } + + # FIXME: should really be legend title + # but atm being using as plot title + my_leg_title = paste0("Total nsSNPs:", tot_muts + , ", Total no. of nsSNPs sites:", tot_sites) + bp_plot_title = my_leg_title + + #------------- + # start plot 2 + #-------------- + # to make x axis display all positions + # not sure if to use with sort or directly + my_x = sort(unique(snpsBYpos_df$snpsBYpos)) + + g = ggplot(snpsBYpos_df, aes(x = snpsBYpos)) + OutPlot_pos_count = g + geom_bar(aes (alpha = 0.5) + , show.legend = FALSE) + + scale_x_continuous(breaks = unique(snpsBYpos_df$snpsBYpos)) + + geom_label(stat = "count", aes(label = ..count..) + , color = "black" + , size = 10) + + theme(axis.text.x = element_text(size = axis_text_size + , angle = 0) + , axis.text.y = element_text(size = axis_text_size + , angle = 0 + , hjust = 1) + , axis.title.x = element_text(size = axis_label_size) + , axis.title.y = element_text(size = axis_label_size) + #, legend.position = c(0.73,0.8) + #, legend.text = element_text(size = leg_text_size) + #, legend.title = element_text(size = axis_label_size) + , plot.title = element_text(size = axis_label_size)) + + + labs(title = bp_plot_title + , x = xaxis_title + , y = yaxis_title) + + return(OutPlot_pos_count) +} + +######################################################################## +# end of function +######################################################################## \ No newline at end of file diff --git a/scripts/plotting/functions/stability_count_bp.R b/scripts/plotting/functions/stability_count_bp.R index 78adf30..0f54ec7 100644 --- a/scripts/plotting/functions/stability_count_bp.R +++ b/scripts/plotting/functions/stability_count_bp.R @@ -1,5 +1,5 @@ #!/usr/bin/env Rscript -# load libraries + ######################################################### # TASK: function for basic barplot returning stability counts ######################################################### @@ -7,7 +7,7 @@ library(ggplot2) theme_set(theme_grey()) #========================================================== -# my_stability_count(): basic barplots for stability counts +# stability_count_bp(): basic barplots for stability counts # input args ## df containing data to plot ## df column name containing stability outcome @@ -16,7 +16,7 @@ theme_set(theme_grey()) #========================================================== stability_count_bp <- function(plotdf , df_colname - , leg_title + , leg_title = "Legend title" , axis_text_size = 25 , axis_label_size = 22 , leg_text_size = 20 @@ -47,4 +47,4 @@ stability_count_bp <- function(plotdf } ############################################################# # end of function -############################################################# \ No newline at end of file +############################################################# diff --git a/scripts/plotting/functions/test_functions.R b/scripts/plotting/functions/test_functions.R index a056ebb..172083c 100644 --- a/scripts/plotting/functions/test_functions.R +++ b/scripts/plotting/functions/test_functions.R @@ -1,27 +1,41 @@ -#================= +setwd("~/git/LSHTM_analysis/scripts/plotting/functions") +getwd() + +# ================= # Test function -#================== -#source("../plotting_data.R") -#infile = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv" -#pd_df = plotting_data(infile) -#my_df = pd_df[[1]] -#my_df_u = pd_df[[2]] -#my_df_u_lig = pd_df[[3]] -#dup_muts = pd_df[[4]] +# ================== +source("../plotting_data.R") +infile = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv" +pd_df = plotting_data(infile) +my_df = pd_df[[1]] +my_df_u = pd_df[[2]] +my_df_u_lig = pd_df[[3]] +dup_muts = pd_df[[4]] -#------------------------------ +# ------------------------------ # barplot for mscm stability -#------------------------------ -#stability_count_bp(plotdf = my_df -# , df_colname = "duet_outcome" -# , leg_title = "DUET outcome") +# ------------------------------ +stability_count_bp(plotdf = my_df_u + , df_colname = "duet_outcome" + , leg_title = "DUET outcome") -#------------------------------ +# ------------------------------ # barplot for ligand affinity -#------------------------------ -#stability_count_bp(plotdf = my_df_u -# , df_colname = "ligand_outcome" -# , leg_title = "Ligand outcome" -# , bp_plot_title = "Sites < 10 Ang of ligand" -#) +# ------------------------------ +stability_count_bp(plotdf = my_df_u_lig + , df_colname = "ligand_outcome" + , leg_title = "Ligand outcome" + , bp_plot_title = "Sites < 10 Ang of ligand" +) +# ------------------------------ +# barplot for nssnp site count: all +# ------------------------------ +site_snp_count_bp(plotdf = my_df_u + , df_colname = "position") + +# ------------------------------ +# barplot for nssnp site count: within 10 Ang +# ------------------------------ +site_snp_count_bp(plotdf = my_df_u_lig + , df_colname = "position")