From d21605b31fd052e3a4028085d314c3be20858e66 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 4 Jun 2021 17:23:41 +0100 Subject: [PATCH] tweaking baic bp to make generic --- scripts/plotting/basic_barplots_PS.R | 44 ++++++++++++++- scripts/plotting/plotting_data.R | 84 ++++------------------------ 2 files changed, 55 insertions(+), 73 deletions(-) diff --git a/scripts/plotting/basic_barplots_PS.R b/scripts/plotting/basic_barplots_PS.R index aea1c21..0ee3f6c 100755 --- a/scripts/plotting/basic_barplots_PS.R +++ b/scripts/plotting/basic_barplots_PS.R @@ -14,7 +14,49 @@ getwd() library(ggplot2) library(data.table) library(dplyr) + +# Set globals: +source("plotting_globals.R") +# pretent cli +drug = "streptomycin" +gene = "gid" +infile = "merged_df3_short.csv" + +import_dirs(drug, gene) + source("plotting_data.R") +plotting_data("merged_df3_short.csv") + +if (!exists("infile") && exists("gene")){ + #in_filename_params = paste0(tolower(gene), "_all_params.csv") + in_filename_params = paste0(tolower(gene), "_comb_stab_struc_params.csv") # part combined for gid + infile = paste0(outdir, "/", in_filename_params) +} + +plotting_data(infile) + + +#======================================================================= +# command line args +spec = matrix(c( + "drug" , "d", 1, "character", + "gene" , "g", 1, "character" +), byrow = TRUE, ncol = 4) + +opt = getopt(spec) + +#FIXME: detect if script running from cmd, then set these +drug = opt$drug +gene = opt$gene + +# hardcoding when not using cmd +#drug = "streptomycin" +#gene = "gid" + +if(is.null(drug)|is.null(gene)) { + stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)") +} +#======================================================================= # should return the following dfs, directories and variables # my_df @@ -31,7 +73,7 @@ cat(paste0("Directories imported:" cat(paste0("Variables imported:" , "\ndrug:", drug , "\ngene:", gene - , "\ngene_match:", gene_match + #, "\ngene_match:", gene_match , "\nLength of upos:", length(upos) , "\nAngstrom symbol:", angstroms_symbol)) diff --git a/scripts/plotting/plotting_data.R b/scripts/plotting/plotting_data.R index d7a0ec7..04b1de8 100755 --- a/scripts/plotting/plotting_data.R +++ b/scripts/plotting/plotting_data.R @@ -6,52 +6,19 @@ #https://stackoverflow.com/questions/38851592/r-append-column-in-a-dataframe-with-frequency-count-based-on-two-columns ######################################################### # working dir and loading libraries -getwd() -setwd("~/git/LSHTM_analysis/scripts/plotting") -getwd() - -#source("Header_TT.R") -library(ggplot2) +#getwd() +#setwd("~/git/LSHTM_analysis/scripts/plotting") +#getwd() library(data.table) library(dplyr) -require("getopt", quietly = TRUE) #cmd parse arguments -#======================================================== -# command line args -spec = matrix(c( - "drug" , "d", 1, "character", - "gene" , "g", 1, "character" -), byrow = TRUE, ncol = 4) -opt = getopt(spec) - -#FIXME: detect if script running from cmd, then set these -#drug = opt$drug -#gene = opt$gene - -# hardcoding when not using cmd -drug = "streptomycin" -gene = "gid" - -if(is.null(drug)|is.null(gene)) { - stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)") -} -#======================================================== -# Load functions -# import dir structure -source("dirs.R") -import_dirs(drug, gene) -#======================================================= - -#====== -# input -#====== -#in_filename = "mcsm_complex1_normalised.csv" -#in_filename_params = paste0(tolower(gene), "_all_params.csv") -in_filename_params = paste0(tolower(gene), "_comb_stab_struc_params.csv") # part combined -infile_params = paste0(outdir, "/", in_filename_params) -cat(paste0("Input file 1:", infile_params) ) +#========================================================= +plotting_data <- function(infile_params) { + +cat(paste0("Input file 1:", infile_params, '\n') ) +# These globals are created by import_dirs() cat('columns based on variables:\n' , drug , '\n' @@ -66,7 +33,7 @@ cat('columns based on variables:\n' ########################### # Read file: struct params ########################### -cat("Reading struct params including mcsm:", in_filename_params) +#cat("Reading struct params including mcsm:", in_filename_params) my_df = read.csv(infile_params, header = T) @@ -146,41 +113,14 @@ cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n") ########################### # extract mutations <10Angstroms and symbols ########################### -table(my_df_u$ligand_distance<10) +table(my_df_u$ligand_distance