#!/usr/bin/env Rscript ######################################################### # TASK: function for barplot showing no. of sites with nsSNP # count ######################################################### # load libraries and functions library(ggplot2) library(data.table) library(dplyr) theme_set(theme_grey()) #================================================================= # site_snp_count_bp(): barplots for no. of sites and nsSNP count # input args ## df containing data to plot ## df column name containing site/position numbers ## ...opt args # TODO: # legend params, currently it appears as title # visually might be nicer for it to be inside the plot #================================================================= site_snp_count_bp <- function (plotdf, df_colname = "position", #, bp_plot_title = "" #, leg_title = "Legend title" leg_text_size = 10,#20 axis_text_size = 10,#25 axis_label_size = 10,#22 subtitle_size = 10,#20 geom_ls = 10, xaxis_title = "Number of nsSNPs", yaxis_title = "Number of Sites", title_colour = "chocolate4", subtitle_text = NULL, subtitle_colour = "pink", ... ) { if (is.null(plotdf)){ return(ggplot() + annotate(x=1,y=1,"text", label="NO DATA")+theme_void()) } plotdf = as.data.frame(plotdf) # dim of plotdf cat(paste0("\noriginal df dimensions:" , "\nNo. of rows:", nrow(plotdf) , "\nNo. of cols:", ncol(plotdf) , "\nNow adding column: frequency of mutational positions")) #------------------------------------------- # adding column: snpcount for each position #------------------------------------------- #setDT(plotdf)[, position_count_check := .N, by = .(eval(parse(text = df_colname)))] # from dplyr plotdf = plotdf %>% dplyr::add_count(eval(parse(text = df_colname))) class(plotdf) plotdf = as.data.frame(plotdf) class(plotdf) nc_change = which(colnames(plotdf) == "n") colnames(plotdf)[nc_change] <- "position_count" class(plotdf) # if (all(plotdf$position_count==plotdf$position_count_check) ){ # cat("\nPASS: position_count column created") # plotdf = plotdf[, !colnames(plotdf)%in%c("position_count_check")] # }else{ # stop("\nAbort: pos count numbes mismatch from dplyr and data.table") # } cat("\nCumulative nssnp count\n" , table(plotdf$position_count)) # calculating total no. of mutations tot_muts = sum(table(plotdf$position_count)) # sanity check if(tot_muts == nrow(plotdf)){ cat("\nPASS: total number of mutations match" , "\nTotal no. of nsSNPs:", tot_muts) } else{ cat("\nWARNING: total no. of muts = ", tot_muts , "\nExpected = ", nrow(plotdf)) } # revised dimensions cat(paste0("\nrevised df dimensions:" , "\nNo. of rows:", nrow(plotdf) , "\nNo. of cols:", ncol(plotdf))) #------------------------------------------------------ # creating df: average count of snpcount for each position # created in earlier step #------------------------------------------------------- # use group by on position_count snpsBYpos_df <- plotdf %>% dplyr::group_by(eval(parse(text = df_colname))) %>% dplyr::summarise(snpsBYpos = mean(position_count)) # changed from summarize! cat("\nnssnp count per position\n" , table(snpsBYpos_df$snpsBYpos) , "\n") # calculating total no. of sites associated with nsSNPs tot_sites = sum(table(snpsBYpos_df$snpsBYpos)) # sanity check if(tot_sites == length(unique(plotdf$position))){ cat("\nPASS: total number of mutation sites match" , "\nTotal no. of sites with nsSNPs:", tot_sites) } else{ cat("WARNING: total no. of sites = ", tot_sites , "\nExpected = ", length(unique(plotdf$position))) } # FIXME: should really be legend title # but atm being using as plot title #my_leg_title bp_plot_title = paste0("Total nsSNPs: ", tot_muts , "\nTotal sites: ", tot_sites) #------------- # start plot 2 #-------------- # to make x axis display all positions # not sure if to use with sort or directly my_x = sort(unique(snpsBYpos_df$snpsBYpos)) ggplot(snpsBYpos_df, aes(x = snpsBYpos)) + geom_bar(aes (alpha = 0.5) , show.legend = FALSE) + scale_x_continuous(breaks = unique(snpsBYpos_df$snpsBYpos)) + geom_label(stat = "count", aes(label = ..count..) , color = "black" , size = geom_ls , position = position_dodge2(width = 1)) + theme(axis.text.x = element_text(size = axis_text_size , angle = 0) , axis.text.y = element_text(size = axis_text_size , angle = 0 , hjust = 1) , axis.title.x = element_text(size = axis_label_size) , axis.title.y = element_text(size = axis_label_size) #, legend.position = c(0.73,0.8) #, legend.text = element_text(size = leg_text_size) #, legend.title = element_text(size = axis_label_size) #, panel.grid.major = element_blank(), #, panel.grid.minor = element_blank(), , panel.grid = element_blank() , plot.title = element_text(size = leg_text_size , colour = title_colour , hjust = 0.5) , plot.subtitle = element_text(size = subtitle_size , hjust = 0.5 , colour = subtitle_colour)) + # labs(title = bp_plot_title # , subtitle = subtitle_text # , x = xaxis_title # , y = yaxis_title) labs(title = "" , subtitle = bp_plot_title , x = xaxis_title , y = yaxis_title) } ######################################################################## # end of function ########################################################################