#!/usr/bin/env Rscript ######################################################### # TASK: producing barplots # basic barplots with count of mutations # basic barplots with frequency of count of mutations # Depends on ## plotting_globals.R (previously dir.R) ## plotting_data.R ######################################################### # working dir getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() # load libraries #source("Header_TT.R") library(ggplot2) library(data.table) library(dplyr) require("getopt", quietly = TRUE) # cmd parse arguments # load functions source("plotting_globals.R") source("plotting_data.R") ######################################################### # command line args #******************** # !!!FUTURE TODO!!! # Can pass additional params of output/plot dir by user. # Not strictly required for my workflow since it is optimised # to have a streamlined input/output flow without filename worries. #******************** spec = matrix(c( "drug" ,"d", 1, "character", "gene" ,"g", 1, "character", "data" ,"f", 2, "character" ), byrow = TRUE, ncol = 4) opt = getopt(spec) #FIXME: detect if script running from cmd, then set these drug = opt$drug gene = opt$gene infile = opt$data # hardcoding when not using cmd #drug = "streptomycin" #gene = "gid" if(is.null(drug)|is.null(gene)) { stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)") } ######################################################### # call functions with relevant args #------------------------------------------ # import_dirs() # should return the follwoing variables: # datadir # indir # outdir # plotdir # dr_muts_col # other_muts_col # resistance_col #-------------------------------------------- import_dirs(drug, gene) #--------------------------------------------- # plotting_data() # should return the following dfs: # my_df # my_df_u # my_df_u_lig # dup_muts #---------------------------------------------- #infile = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv" #infile = "" #if (!exists("infile") && exists("gene")){ if (!is.character(infile) && exists("gene")){ #in_filename_params = paste0(tolower(gene), "_all_params.csv") in_filename_params = paste0(tolower(gene), "_comb_stab_struc_params.csv") # part combined for gid infile = paste0(outdir, "/", in_filename_params) cat("\nInput file not specified, assuming filename: ", infile, "\n") } # Get the DFs out of plotting_data() pd_df = plotting_data(infile) my_df = pd_df[[1]] my_df_u = pd_df[[2]] my_df_u_lig = pd_df[[3]] dup_muts = pd_df[[4]] ######################################################### cat(paste0("Directories imported:" , "\ndatadir:", datadir , "\nindir:", indir , "\noutdir:", outdir , "\nplotdir:", plotdir)) cat(paste0("Variables imported:" , "\ndrug:", drug , "\ngene:", gene)) #, "\ngene_match:", gene_match #, "\nLength of upos:", length(upos) #, "\nAngstrom symbol:", angstroms_symbol)) #====================================================================== #======= # output #======= # plot 1 basic_bp_foldx = "basic_barplot_foldx.svg" plot_basic_bp_foldx = paste0(plotdir,"/", basic_bp_foldx) #======================================================================= #================ # Data for plots #================ # REASSIGNMENT as necessary df = my_df_u # sanity checks str(df) #======================================================================= #**************** # Plot 1: Count of stabilising and destabilsing muts #**************** svg(plot_basic_bp_foldx) print(paste0("plot1 filename:", plot_basic_bp_foldx)) my_ats = 25 # axis text size my_als = 22 # axis label size theme_set(theme_grey()) #-------------- # start plot 1 #-------------- g = ggplot(df, aes(x = foldx_outcome)) foldx_outcome_count = g + geom_bar(aes(fill = foldx_outcome) , show.legend = TRUE) + geom_label(stat = "count" , aes(label = ..count..) , color = "black" , show.legend = FALSE , size = 10) + theme(axis.text.x = element_blank() , axis.title.x = element_blank() , axis.title.y = element_text(size=my_als) , axis.text.y = element_text(size = my_ats) , legend.position = c(0.73,0.8) , legend.text = element_text(size=my_als-2) , legend.title = element_text(size=my_als) , plot.title = element_blank()) + labs(title = "" , y = "Number of nsSNPs" #, fill="FoldX Outcome" ) + scale_fill_discrete(name = "FoldX Outcome" , labels = c("Destabilising", "Stabilising")) print(foldx_outcome_count) dev.off() table(df$foldx_outcome) ######################################################################## # end of foldx barplot ########################################################################