#!/usr/bin/env Rscript ######################################################### # TASK: function for barplot showing no. of sites with nsSNP # count ######################################################### # load libraries and functions library(ggplot2) library(data.table) library(dplyr) theme_set(theme_grey()) #================================================================= # site_snp_count_bp(): barplots for no. of sites and nsSNP count # input args ## df containing data to plot ## df column name containing site/position numbers ## ...opt args # TODO: # legend params, currently it appears as title # visually might be nicer for it to be inside the plot #================================================================= site_snp_count_bp <- function (plotdf , df_colname = "position" #, bp_plot_title = "" #, leg_title = "Legend title" , leg_text_size = 10#20 , axis_text_size = 10#25 , axis_label_size = 10#22 , subtitle_size = 10#20 , geom_ls = 10 , xaxis_title = "Number of nsSNPs" , yaxis_title = "Number of Sites" , title_colour = "chocolate4" , subtitle_text = NULL , subtitle_colour = "pink") { plotdf = as.data.frame(plotdf) # dim of plotdf cat(paste0("\noriginal df dimensions:" , "\nNo. of rows:", nrow(plotdf) , "\nNo. of cols:", ncol(plotdf) , "\nNow adding column: frequency of mutational positions")) #------------------------------------------- # adding column: snpcount for each position #------------------------------------------- #setDT(plotdf)[, pos_count_check := .N, by = .(eval(parse(text = df_colname)))] # from dplyr plotdf = plotdf %>% dplyr::add_count(eval(parse(text = df_colname))) class(plotdf) plotdf = as.data.frame(plotdf) class(plotdf) nc_change = which(colnames(plotdf) == "n") colnames(plotdf)[nc_change] <- "pos_count" class(plotdf) # if (all(plotdf$pos_count==plotdf$pos_count_check) ){ # cat("\nPASS: pos_count column created") # plotdf = plotdf[, !colnames(plotdf)%in%c("pos_count_check")] # }else{ # stop("\nAbort: pos count numbes mismatch from dplyr and data.table") # } cat("\nCumulative nssnp count\n" , table(plotdf$pos_count)) # calculating total no. of mutations tot_muts = sum(table(plotdf$pos_count)) # sanity check if(tot_muts == nrow(plotdf)){ cat("\nPASS: total number of mutations match" , "\nTotal no. of nsSNPs:", tot_muts) } else{ cat("\nWARNING: total no. of muts = ", tot_muts , "\nExpected = ", nrow(plotdf)) } # revised dimensions cat(paste0("\nrevised df dimensions:" , "\nNo. of rows:", nrow(plotdf) , "\nNo. of cols:", ncol(plotdf))) #------------------------------------------------------ # creating df: average count of snpcount for each position # created in earlier step #------------------------------------------------------- # use group by on pos_count snpsBYpos_df <- plotdf %>% dplyr::group_by(eval(parse(text = df_colname))) %>% dplyr::summarise(snpsBYpos = mean(pos_count)) # changed from summarize! cat("\nnssnp count per position\n" , table(snpsBYpos_df$snpsBYpos) , "\n") # calculating total no. of sites associated with nsSNPs tot_sites = sum(table(snpsBYpos_df$snpsBYpos)) # sanity check if(tot_sites == length(unique(plotdf$position))){ cat("\nPASS: total number of mutation sites match" , "\nTotal no. of sites with nsSNPs:", tot_sites) } else{ cat("WARNING: total no. of sites = ", tot_sites , "\nExpected = ", length(unique(plotdf$position))) } # FIXME: should really be legend title # but atm being using as plot title #my_leg_title bp_plot_title = paste0("Total nsSNPs: ", tot_muts , "\nTotal sites: ", tot_sites) #------------- # start plot 2 #-------------- # to make x axis display all positions # not sure if to use with sort or directly my_x = sort(unique(snpsBYpos_df$snpsBYpos)) ggplot(snpsBYpos_df, aes(x = snpsBYpos)) + geom_bar(aes (alpha = 0.5) , show.legend = FALSE) + scale_x_continuous(breaks = unique(snpsBYpos_df$snpsBYpos)) + geom_label(stat = "count", aes(label = ..count..) , color = "black" , size = geom_ls) + theme(axis.text.x = element_text(size = axis_text_size , angle = 0) , axis.text.y = element_text(size = axis_text_size , angle = 0 , hjust = 1) , axis.title.x = element_text(size = axis_label_size) , axis.title.y = element_text(size = axis_label_size) #, legend.position = c(0.73,0.8) #, legend.text = element_text(size = leg_text_size) #, legend.title = element_text(size = axis_label_size) #, panel.grid.major = element_blank(), #, panel.grid.minor = element_blank(), , panel.grid = element_blank() , plot.title = element_text(size = leg_text_size , colour = title_colour , hjust = 0.5) , plot.subtitle = element_text(size = subtitle_size , hjust = 0.5 , colour = subtitle_colour)) + # labs(title = bp_plot_title # , subtitle = subtitle_text # , x = xaxis_title # , y = yaxis_title) labs(title = "" , subtitle = bp_plot_title , x = xaxis_title , y = yaxis_title) } ######################################################################## # end of function ########################################################################