From ce8abafdfe70487a87e88ae674c514aabf4ef957 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 8 Jun 2021 16:48:19 +0100 Subject: [PATCH] wrapper script basic_barplots_PS.R now takes cmd and calls functions to generate plots.Tested and verfiied. --- scripts/plotting/basic_barplots_PS.R | 39 ++++++++++++++++------------ scripts/plotting/plotting_data.R | 30 +++++++-------------- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/scripts/plotting/basic_barplots_PS.R b/scripts/plotting/basic_barplots_PS.R index a184311..363e366 100755 --- a/scripts/plotting/basic_barplots_PS.R +++ b/scripts/plotting/basic_barplots_PS.R @@ -25,16 +25,24 @@ source("plotting_globals.R") source("plotting_data.R") ######################################################### # command line args +#******************** +# !!!FUTURE TODO!!! +# Can pass additional params of output/plot dir by user. +# Not strictly required for my workflow since it is optimised +# to have a streamlined input/output flow without filename worries. +#******************** spec = matrix(c( - "drug" , "d", 1, "character", - "gene" , "g", 1, "character" + "drug" ,"d", 1, "character", + "gene" ,"g", 1, "character", + "data" ,"f", 2, "character" ), byrow = TRUE, ncol = 4) opt = getopt(spec) #FIXME: detect if script running from cmd, then set these -drug = opt$drug -gene = opt$gene +drug = opt$drug +gene = opt$gene +infile = opt$data # hardcoding when not using cmd #drug = "streptomycin" @@ -45,14 +53,16 @@ if(is.null(drug)|is.null(gene)) { } ######################################################### # call functions with relevant args -drug = "streptomycin" -gene = "gid" +#drug = "streptomycin" +#gene = "gid" import_dirs(drug, gene) -if (!exists("infile") && exists("gene")){ - #in_filename_params = paste0(tolower(gene), "_all_params.csv") +#if (!exists("infile") && exists("gene")){ +if (!is.character(infile) && exists("gene")){ + #in_filename_params = paste0(tolower(gene), "_all_params.csv") in_filename_params = paste0(tolower(gene), "_comb_stab_struc_params.csv") # part combined for gid infile = paste0(outdir, "/", in_filename_params) + cat("\nInput file not specified, assuming filename: ", infile, "\n") } #infile = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv" @@ -64,6 +74,7 @@ my_df = pd_df[[1]] my_df_u = pd_df[[2]] my_df_u_lig = pd_df[[3]] dup_muts = pd_df[[4]] + ######################################################### # This script: should return the following dfs, directories and variables # my_df @@ -79,15 +90,11 @@ cat(paste0("Directories imported:" cat(paste0("Variables imported:" , "\ndrug:", drug - , "\ngene:", gene + , "\ngene:", gene)) #, "\ngene_match:", gene_match - , "\nLength of upos:", length(upos) - , "\nAngstrom symbol:", angstroms_symbol)) - -# clear excess variable -rm(my_df, upos, dup_muts, my_df_u_lig) - -#======================================================================= + #, "\nLength of upos:", length(upos) + #, "\nAngstrom symbol:", angstroms_symbol)) + #======================================================================= #======= # output #======= diff --git a/scripts/plotting/plotting_data.R b/scripts/plotting/plotting_data.R index 87c4fd2..e5c0849 100755 --- a/scripts/plotting/plotting_data.R +++ b/scripts/plotting/plotting_data.R @@ -6,13 +6,12 @@ library(data.table) library(dplyr) ######################################################### -# FIXME (not urgent!): Dirty function return nothing, but creates global dfs + # plotting_data(): formatting data for plots # input args: ## input csv file ## lig cut off dist, default = 10 Ang -# output: None -# Side effects: global dfs (formatted and added columns) +# output: list of 4 dfs, that need to be decompressed ## my_df ## my_df_u ## my_df_u_lig @@ -24,18 +23,7 @@ my_df_u = data.frame() my_df_u_lig = data.frame() dup_muts = data.frame() -cat(paste0("Input file 1:", infile_params, '\n') ) - -# These globals are created by import_dirs() -#cat('columns based on variables:\n' -# , drug -# , '\n' -# , dr_muts_col -# , '\n' -# , other_muts_col -# , "\n" -# , resistance_col -# , '\n===============================================================') +cat(paste0("\nInput file to prepare for plotting:", infile_params, "\n") ) #=========================== # Read file: struct params @@ -73,7 +61,7 @@ if (my_min == -1 && my_max == 1){ , "\nProceeding with assigning foldx outcome category") }else{ cat("\nFAIL: could not scale foldx ddg values" - , "Aborting!") + , "Aborting!\n") } #------------------------------ @@ -87,7 +75,7 @@ c2 = table(my_df$ddg < 0) if ( all(c1 == c2) ){ cat("\nPASS: foldx outcome successfully created") }else{ - cat("\nFAIL: foldx outcome could not be created. Aborting!") + cat("\nFAIL: foldx outcome could not be created. Aborting!\n") exit() } @@ -98,21 +86,21 @@ if ( all(c1 == c2) ){ # check for duplicate mutations if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformation)){ cat(paste0("\nCAUTION:", " Duplicate mutations identified" - , "\nExtracting these...")) + , "\nExtracting these...\n")) #cat(my_df[duplicated(my_df$mutationinformation),]) dup_muts = my_df[duplicated(my_df$mutationinformation),] dup_muts_nu = length(unique(dup_muts$mutationinformation)) cat(paste0("\nDim of duplicate mutation df:", nrow(dup_muts) , "\nNo. of unique duplicate mutations:", dup_muts_nu - , "\n\nExtracting df with unique mutations only")) + , "\n\nExtracting df with unique mutations only\n")) my_df_u = my_df[!duplicated(my_df$mutationinformation),] }else{ - cat(paste0("\nNo duplicate mutations detected")) + cat(paste0("\nNo duplicate mutations detected\n")) my_df_u = my_df } upos = unique(my_df_u$position) -cat("\nDim of clean df:"); cat(dim(my_df_u)) +cat("\nDim of clean df:"); cat(dim(my_df_u), "\n") cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n") #===============================================