tweaking baic bp to make generic

2021-06-04 17:23:41 +01:00 · 2021-06-04 17:23:41 +01:00 · d21605b31f
commit d21605b31f
parent 4f60e93abb
2 changed files with 55 additions and 73 deletions
--- a/scripts/plotting/basic_barplots_PS.R
+++ b/scripts/plotting/basic_barplots_PS.R
@ -14,7 +14,49 @@ getwd()
 library(ggplot2)
 library(data.table)
 library(dplyr)
+
+# Set globals: 
+source("plotting_globals.R")
+# pretent cli
+drug = "streptomycin"
+gene = "gid"
+infile = "merged_df3_short.csv"
+
+import_dirs(drug, gene)
+
 source("plotting_data.R")
+plotting_data("merged_df3_short.csv")
+
+if (!exists("infile") && exists("gene")){
+  #in_filename_params = paste0(tolower(gene), "_all_params.csv") 
+  in_filename_params = paste0(tolower(gene), "_comb_stab_struc_params.csv") # part combined for gid
+  infile = paste0(outdir, "/", in_filename_params)
+}
+
+plotting_data(infile)
+
+
+#=======================================================================
+# command line args
+spec = matrix(c(
+  "drug"   , "d", 1, "character",
+  "gene"   , "g", 1, "character"
+), byrow = TRUE, ncol = 4)
+
+opt = getopt(spec)
+
+#FIXME: detect if script running from cmd, then set these
+drug = opt$drug
+gene = opt$gene
+
+# hardcoding when not using cmd
+#drug = "streptomycin"
+#gene = "gid"
+
+if(is.null(drug)|is.null(gene)) {
+  stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
+}
+#=======================================================================

 # should return the following dfs, directories and variables
 # my_df
@ -31,7 +73,7 @@ cat(paste0("Directories imported:"
 cat(paste0("Variables imported:"
           , "\ndrug:", drug
           , "\ngene:", gene
-           , "\ngene_match:", gene_match
+           #, "\ngene_match:", gene_match
           , "\nLength of upos:", length(upos)
           , "\nAngstrom symbol:", angstroms_symbol))       
           
--- a/scripts/plotting/plotting_data.R
+++ b/scripts/plotting/plotting_data.R
@ -6,52 +6,19 @@
 #https://stackoverflow.com/questions/38851592/r-append-column-in-a-dataframe-with-frequency-count-based-on-two-columns
 #########################################################
 # working dir and loading libraries
-getwd()
-setwd("~/git/LSHTM_analysis/scripts/plotting")
-getwd()
-
-#source("Header_TT.R")
-library(ggplot2)
+#getwd()
+#setwd("~/git/LSHTM_analysis/scripts/plotting")
+#getwd()
 library(data.table)
 library(dplyr)
-require("getopt", quietly = TRUE) #cmd parse arguments
-#========================================================
-# command line args
-spec = matrix(c(
-  "drug"   , "d", 1, "character",
-  "gene"   , "g", 1, "character"
-), byrow = TRUE, ncol = 4)

-opt = getopt(spec)
+#=========================================================

-#FIXME: detect if script running from cmd, then set these
-#drug = opt$drug
-#gene = opt$gene
-
-# hardcoding when not using cmd
-drug = "streptomycin"
-gene = "gid"
-
-if(is.null(drug)|is.null(gene)) {
-  stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
-}
-#========================================================
-# Load functions
-# import dir structure
-source("dirs.R")
-import_dirs(drug, gene)
-#=======================================================
-
-#======
-# input
-#======
-#in_filename = "mcsm_complex1_normalised.csv"
-#in_filename_params = paste0(tolower(gene), "_all_params.csv") 
-in_filename_params = paste0(tolower(gene), "_comb_stab_struc_params.csv") # part combined
-infile_params = paste0(outdir, "/", in_filename_params)
-cat(paste0("Input file 1:", infile_params) )
+plotting_data <- function(infile_params) {
  
+cat(paste0("Input file 1:", infile_params, '\n') )

+# These globals are created by import_dirs()
 cat('columns based on variables:\n'
      , drug
      , '\n'
@ -66,7 +33,7 @@ cat('columns based on variables:\n'
 ###########################
 # Read file: struct params
 ###########################
-cat("Reading struct params including mcsm:", in_filename_params)
+#cat("Reading struct params including mcsm:", in_filename_params)
    
 my_df = read.csv(infile_params, header = T)

@ -146,41 +113,14 @@ cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n")
 ###########################
 # extract mutations <10Angstroms and symbols
 ###########################
-table(my_df_u$ligand_distance<10)
+table(my_df_u$ligand_distance<mcsm_lig_cutoff)

-my_df_u_lig = my_df_u[my_df_u$ligand_distance <10,]
+my_df_u_lig = my_df_u[my_df_u$ligand_distance <mcsm_lig_cutoff,]

-
-#==================
-# Angstroms symbol
-#==================
-
-angstroms_symbol = "\u212b"
 cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))

-#==================
-# Delta symbol
-#==================
-
-delta_symbol = "\u0394"; delta_symbol
-
-###########################
-# variables for my cols
-###########################
-
-mcsm_red2 =  "#ae301e" # most negative
-mcsm_red1 =  "#f8766d"
-
-mcsm_mid = "white" # middle
-
-mcsm_blue1 = "#00bfc4"
-mcsm_blue2 = "#007d85" # most positive
-
-
 ########################################################################
 #               end of data extraction and cleaning for plots          #
 ########################################################################
-# clear variables
-rm(opt, spec)
-

+}