saving work on logo plots before finishing

2021-06-23 16:49:18 +01:00 · 2021-06-23 16:49:18 +01:00 · 48a85ede0c
commit 48a85ede0c
parent 9d964e84b6
2 changed files with 38 additions and 18 deletions
--- a/scripts/plotting/logo_multiple_muts.R
+++ b/scripts/plotting/logo_multiple_muts.R
@ -16,22 +16,45 @@ source("../functions/combining_dfs_plotting.R")
 ###########################################################
 # command line args
 #********************
-drug = 'streptomycin'
-gene = 'gid'
+#drug = 'streptomycin'
+#gene = 'gid'
+#********************
+# !!!FUTURE TODO!!!
+# Can pass additional params of output/plot dir by user. 
+# Not strictly required for my workflow  since it is optimised
+# to have a streamlined input/output flow without filename worries.
+#********************
+spec = matrix(c(
+  "drug"       , "d",  1, "character",
+  "gene"       , "g",  1, "character",
+  "data_file1" , "fa", 2, "character",
+  "data_file2" , "fb", 2, "character" 
+), byrow = TRUE, ncol = 4)

+opt = getopt(spec)
+
+#FIXME: detect if script running from cmd, then set these
+drug            = opt$drug
+gene            = opt$gene
+infile_params   = opt$data_file1
+infile_metadata = opt$data_file2
+
+if(is.null(drug)|is.null(gene)) {
+  stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
+}
 #===========
 # input
 #===========
 #---------------------
 # call: import_dirs()
 #---------------------
-import_dirs(drug, gene)
+import_dirs(drug_name = drug, gene_name = gene)

 #---------------------------
 # call: plotting_data()
 #---------------------------
-if (!exists("infile_params") && exists("gene")){
-  #if (!is.character(infile_params) && exists("gene")){
+#if (!exists("infile_params") && exists("gene")){
+if (!is.character(infile_params) && exists("gene")){ # when running as cmd
  #in_filename_params = paste0(tolower(gene), "_all_params.csv") 
  in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid
  infile_params = paste0(outdir, "/", in_filename_params)
@ -39,21 +62,23 @@ if (!exists("infile_params") && exists("gene")){
 }

 # Input 1: read <gene>_comb_afor.csv
-pd_df = plotting_data(infile_params)
+cat("\nReading mcsm combined data file: ", infile_params)
+mcsm_df = read.csv(infile_params, header = T)
+pd_df = plotting_data(mcsm_df)
 my_df_u       = pd_df[[1]] # this forms one of the input for combining_dfs_plotting()

 #--------------------------------
 # call: combining_dfs_plotting()
 #--------------------------------
-if (!exists("infile_metadata") && exists("gene")){
-  #if (!is.character(infile_params) && exists("gene")){{
+#if (!exists("infile_metadata") && exists("gene")){
+if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd
  in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
  infile_metadata = paste0(outdir, "/", in_filename_metadata)
  cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
 }

 # Input 2: read <gene>_meta data.csv
-cat("\nReading meta data file:", infile_metadata)
+cat("\nReading meta data file: ", infile_metadata)

 gene_metadata <- read.csv(infile_metadata
                          , stringsAsFactors = F
@ -64,12 +89,7 @@ all_plot_dfs = combining_dfs_plotting(my_df_u
                                      , lig_dist_colname = 'ligand_distance'
                                      , lig_dist_cutoff = 10)

-#merged_df2        = all_plot_dfs[[1]]
-merged_df3        = all_plot_dfs[[2]]
-#merged_df2_comp   = all_plot_dfs[[3]]
-#merged_df3_comp   = all_plot_dfs[[4]]
-#merged_df2_lig    = all_plot_dfs[[5]]
-#merged_df3_lig    = all_plot_dfs[[6]]
+merged_df3 = all_plot_dfs[[2]]

 #===========
 # output
@ -93,7 +113,7 @@ c1 = unique(my_df$position)
 nrow(my_df) 

 # get freq count of positions so you can subset freq<1
-#require(data.table)
+require(data.table)
 setDT(my_df)[, mut_pos_occurrence := .N, by = .(position)] #189, 36

 table(my_df$position)