changes made to combining_dfs_plotting.R

2021-06-23 16:15:15 +01:00 · 2021-06-23 16:15:15 +01:00 · 2aec79af31
commit 2aec79af31
parent c6d1260f74
9 changed files with 258 additions and 126 deletions
--- a/scripts/plotting/Header_TT.R
+++ b/scripts/plotting/Header_TT.R
@ -7,6 +7,7 @@
 #  install.packages("gplots", dependencies = TRUE)
 #  library(gplots)
 #}
+require("getopt", quietly = TRUE) # cmd parse arguments

 if (!require("tidyverse")) {
  install.packages("tidyverse", dependencies = TRUE)
--- a/scripts/plotting/logo_combined.R
+++ b/scripts/plotting/logo_combined.R
@ -10,15 +10,66 @@ setwd("~/git/LSHTM_analysis/scripts/plotting")
 getwd()

 source("Header_TT.R")
-#library(ggplot2)
-#library(data.table)
-#library(dplyr)
+source("../functions/plotting_globals.R")
+source("../functions/plotting_data.R")
+source("../functions/combining_dfs_plotting.R")
+###########################################################
+# command line args
+#********************
+drug = 'streptomycin'
+gene = 'gid'

 #===========
 # input
 #===========
-source("combining_dfs_plotting.R")
+#---------------------
+# call: import_dirs()
+#---------------------
+import_dirs(drug, gene)

+#---------------------------
+# call: plotting_data()
+#---------------------------
+if (!exists("infile_params") && exists("gene")){
+  #if (!is.character(infile_params) && exists("gene")){
+  #in_filename_params = paste0(tolower(gene), "_all_params.csv") 
+  in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid
+  infile_params = paste0(outdir, "/", in_filename_params)
+  cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
+}
+
+# Input 1: read <gene>_comb_afor.csv
+pd_df = plotting_data(infile_params)
+my_df_u       = pd_df[[1]] # this forms one of the input for combining_dfs_plotting()
+
+#--------------------------------
+# call: combining_dfs_plotting()
+#--------------------------------
+if (!exists("infile_metadata") && exists("gene")){
+  #if (!is.character(infile_params) && exists("gene")){{
+  in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
+  infile_metadata = paste0(outdir, "/", in_filename_metadata)
+  cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
+}
+
+# Input 2: read <gene>_meta data.csv
+cat("\nReading meta data file:", infile_metadata)
+
+gene_metadata <- read.csv(infile_metadata
+                          , stringsAsFactors = F
+                          , header = T)
+
+all_plot_dfs = combining_dfs_plotting(my_df_u
+                                      , gene_metadata
+                                      , lig_dist_colname = 'ligand_distance'
+                                      , lig_dist_cutoff = 10)
+
+#merged_df2        = all_plot_dfs[[1]]
+merged_df3        = all_plot_dfs[[2]]
+#merged_df2_comp   = all_plot_dfs[[3]]
+#merged_df3_comp   = all_plot_dfs[[4]]
+#merged_df2_lig    = all_plot_dfs[[5]]
+#merged_df3_lig    = all_plot_dfs[[6]]
 #===========
 # output
 #===========
@ -32,12 +83,6 @@ plot_logo_combined_labelled  = paste0(plotdir,"/", logo_combined_labelled)
 # REASSIGNMENT
 my_df = merged_df3 
 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-# clear excess variables
-rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
-   , merged_df3_comp, merged_df3_comp_lig
-   , my_df_u, my_df_u_lig, merged_df3_lig)
-
 colnames(my_df)
 str(my_df)

@ -166,8 +211,8 @@ p3 = p2 +
  theme(legend.position = "bottom"
        #, legend.title = element_blank()
        , legend.title = element_text("Amino acid properties", size = 20)
-        , legend.text = element_text( size = 20)
-        , axis.text.x = element_text(size = 20, angle = 90)
+        , legend.text = element_text(size = 20)
+        , axis.text.x = element_text(size = 16, angle = 90)
        , axis.text.y = element_blank()
        , axis.title.y = element_blank()
        , axis.title.x = element_text(size = 22))+
@ -237,7 +282,7 @@ position_or = as.numeric(colnames(wide_df_or))
 #============================================  
 # custom height (OR) logo plot: yayy works
 logo_or = ggseqlogo(wide_df_or, method="custom", seq_type="aa") + ylab("my custom height") +
-  theme(axis.text.x = element_text(size = 16
+  theme(axis.text.x = element_text(size = 15
                                    , angle = 90
                                    , hjust = 1
                                    , vjust = 0.4)
--- a/scripts/plotting/logo_multiple_muts.R
+++ b/scripts/plotting/logo_multiple_muts.R
@ -10,14 +10,66 @@ setwd("~/git/LSHTM_analysis/scripts/plotting")
 getwd()

 source("Header_TT.R")
-#library(ggplot2)
-#library(data.table)
-#library(dplyr)
+source("../functions/plotting_globals.R")
+source("../functions/plotting_data.R")
+source("../functions/combining_dfs_plotting.R")
+###########################################################
+# command line args
+#********************
+drug = 'streptomycin'
+gene = 'gid'

 #===========
 # input
 #===========
-source("combining_dfs_plotting.R")
+#---------------------
+# call: import_dirs()
+#---------------------
+import_dirs(drug, gene)
+
+#---------------------------
+# call: plotting_data()
+#---------------------------
+if (!exists("infile_params") && exists("gene")){
+  #if (!is.character(infile_params) && exists("gene")){
+  #in_filename_params = paste0(tolower(gene), "_all_params.csv") 
+  in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid
+  infile_params = paste0(outdir, "/", in_filename_params)
+  cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
+}
+
+# Input 1: read <gene>_comb_afor.csv
+pd_df = plotting_data(infile_params)
+my_df_u       = pd_df[[1]] # this forms one of the input for combining_dfs_plotting()
+
+#--------------------------------
+# call: combining_dfs_plotting()
+#--------------------------------
+if (!exists("infile_metadata") && exists("gene")){
+  #if (!is.character(infile_params) && exists("gene")){{
+  in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
+  infile_metadata = paste0(outdir, "/", in_filename_metadata)
+  cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
+}
+
+# Input 2: read <gene>_meta data.csv
+cat("\nReading meta data file:", infile_metadata)
+
+gene_metadata <- read.csv(infile_metadata
+                          , stringsAsFactors = F
+                          , header = T)
+
+all_plot_dfs = combining_dfs_plotting(my_df_u
+                                      , gene_metadata
+                                      , lig_dist_colname = 'ligand_distance'
+                                      , lig_dist_cutoff = 10)
+
+#merged_df2        = all_plot_dfs[[1]]
+merged_df3        = all_plot_dfs[[2]]
+#merged_df2_comp   = all_plot_dfs[[3]]
+#merged_df3_comp   = all_plot_dfs[[4]]
+#merged_df2_lig    = all_plot_dfs[[5]]
+#merged_df3_lig    = all_plot_dfs[[6]]

 #===========
 # output
@ -32,12 +84,6 @@ plot_logo_multiple_muts = paste0(plotdir,"/", logo_multiple_muts)
 # REASSIGNMENT
 my_df = merged_df3 
 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-# clear excess variables
-rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
-   , merged_df3_comp, merged_df3_comp_lig
-   , my_df_u, my_df_u_lig, merged_df3_lig)
-
 colnames(my_df)
 str(my_df)

@ -117,7 +163,7 @@ p0
 p1 = p0 + theme(legend.position = "none"
                , legend.title = element_blank()
                , legend.text = element_text(size = 20)
-                , axis.text.x = element_text(size = 20, angle = 90)
+                , axis.text.x = element_text(size = 17, angle = 90)
                , axis.text.y = element_blank())
 p1

@ -138,7 +184,6 @@ rownames(tab_wt)

 #**************
 # Plot 2: wild_type logo
-
 #**************
 # sanity check: MUST BE TRUE

@ -164,8 +209,8 @@ p3 = p2 +
  theme(legend.position = "bottom"
        #, legend.title = element_blank()
        , legend.title = element_text("Amino acid properties", size = 20)
-        , legend.text = element_text( size = 20)
-        , axis.text.x = element_text(size = 20, angle = 90)
+        , legend.text = element_text(size = 20)
+        , axis.text.x = element_text(size = 17, angle = 90)
        , axis.text.y = element_blank()
        , axis.title.x = element_text(size = 22))+
  
--- a/scripts/plotting/logo_plot.R
+++ b/scripts/plotting/logo_plot.R
@ -15,9 +15,36 @@ source("../functions/combining_dfs_plotting.R")
 ###########################################################
 # command line args
 #********************
-drug = 'streptomycin'
-gene = 'gid'
+#drug = 'streptomycin'
+#gene = 'gid'
+#********************
+# !!!FUTURE TODO!!!
+# Can pass additional params of output/plot dir by user. 
+# Not strictly required for my workflow  since it is optimised
+# to have a streamlined input/output flow without filename worries.
+#********************
+spec = matrix(c(
+  "drug"       , "d",  1, "character",
+  "gene"       , "g",  1, "character",
+  "data_file1" , "fa", 2, "character",
+  "data_file2" , "fb", 2, "character" 
+), byrow = TRUE, ncol = 4)

+opt = getopt(spec)
+
+#FIXME: detect if script running from cmd, then set these
+drug            = opt$drug
+gene            = opt$gene
+infile_params   = opt$data_file1
+infile_metadata = opt$data_file2
+
+# hardcoding when not using cmd
+#drug = "streptomycin"
+#gene = "gid"
+
+if(is.null(drug)|is.null(gene)) {
+  stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
+}
 #===========
 # input
 #===========
@ -29,8 +56,8 @@ import_dirs(drug, gene)
 #---------------------------
 # call: plotting_data()
 #---------------------------
-if (!exists("infile_params") && exists("gene")){
-  #if (!is.character(infile_params) && exists("gene")){
+#if (!exists("infile_params") && exists("gene")){
+if (!is.character(infile_params) && exists("gene")){
  #in_filename_params = paste0(tolower(gene), "_all_params.csv") 
  in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid
  infile_params = paste0(outdir, "/", in_filename_params)
@ -38,14 +65,15 @@ if (!exists("infile_params") && exists("gene")){
 }

 # Input 1: read <gene>_comb_afor.csv
-pd_df = plotting_data(infile_params)
+my_df = read.csv(infile_params, header = T)
+pd_df = plotting_data(my_df)
 my_df_u       = pd_df[[1]] # this forms one of the input for combining_dfs_plotting()

 #--------------------------------
 # call: combining_dfs_plotting()
 #--------------------------------
-if (!exists("infile_metadata") && exists("gene")){
-  #if (!is.character(infile_params) && exists("gene")){{
+#if (!exists("infile_metadata") && exists("gene")){
+if (!is.character(infile_params) && exists("gene")){
  in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
  infile_metadata = paste0(outdir, "/", in_filename_metadata)
  cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
@ -63,7 +91,7 @@ all_plot_dfs = combining_dfs_plotting(my_df_u
                                      , lig_dist_colname = 'ligand_distance'
                                      , lig_dist_cutoff = 10)

-merged_df2        = all_plot_dfs[[1]]
+#merged_df2        = all_plot_dfs[[1]]
 merged_df3        = all_plot_dfs[[2]]
 #merged_df2_comp   = all_plot_dfs[[3]]
 #merged_df3_comp   = all_plot_dfs[[4]]
@ -146,6 +174,7 @@ wide_df_or <- logo_data_or %>% spread(position, or_mychisq, fill = 0.0)

 wide_df_or = as.matrix(wide_df_or)
 rownames(wide_df_or) = wide_df_or[,1]
+dim(wide_df_or)
 wide_df_or = wide_df_or[,-1]
 str(wide_df_or)