From 6bbc3328b9b3cfd96c0ddf572efed024870361a7 Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Thu, 24 Jun 2021 14:21:34 +0100
Subject: [PATCH] added get_plotting_dfs.R as a mother script to be sourced by
 all plotting scripts

---
 scripts/plotting/get_plotting_dfs.R | 155 ++++++++++++++++++++++++++++
 1 file changed, 155 insertions(+)
 create mode 100644 scripts/plotting/get_plotting_dfs.R
diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R
new file mode 100644
index 0000000..32a0fbc
--- /dev/null
+++ b/scripts/plotting/get_plotting_dfs.R
@@ -0,0 +1,155 @@
+#!/usr/bin/env Rscript
+#########################################################
+# TASK: Get formatted data for plots
+#=======================================================================
+# working dir and loading libraries
+getwd()
+setwd("~/git/LSHTM_analysis/scripts/plotting")
+getwd()
+
+source("Header_TT.R")
+source("../functions/my_pairs_panel.R") # with lower panel turned off
+source("../functions/plotting_globals.R")
+source("../functions/plotting_data.R")
+source("../functions/combining_dfs_plotting.R")
+
+#********************
+# cmd args passed 
+# in from other scripts
+# to call this
+#********************
+#drug = 'streptomycin'
+#gene = 'gid'
+#====================
+# variables for lig
+#====================
+
+LigDist_colname = "ligand_distance"
+LigDist_cutoff = 20
+
+#===========
+# input
+#===========
+#---------------------
+# call: import_dirs()
+#---------------------
+import_dirs(drug, gene)
+
+#---------------------------
+# call: plotting_data()
+#---------------------------
+#if (!exists("infile_params") && exists("gene")){
+if (!is.character(infile_params) && exists("gene")){ # when running as cmd
+  #in_filename_params = paste0(tolower(gene), "_all_params.csv") 
+  in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid
+  infile_params = paste0(outdir, "/", in_filename_params)
+  cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
+}
+
+# Input 1: read <gene>_comb_afor.csv
+cat("\nReading mcsm combined data file: ", infile_params)
+mcsm_df = read.csv(infile_params, header = T)
+pd_df = plotting_data(mcsm_df
+                      , lig_dist_colname = LigDist_colname
+                      , lig_dist_cutoff = LigDist_cutoff)
+
+my_df       = pd_df[[1]] 
+my_df_u     = pd_df[[2]] # this forms one of the input for combining_dfs_plotting()
+my_df_u_lig = pd_df[[3]] 
+dup_muts    = pd_df[[4]] 
+
+#--------------------------------
+# call: combining_dfs_plotting()
+#--------------------------------
+#if (!exists("infile_metadata") && exists("gene")){
+if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd
+  in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
+  infile_metadata = paste0(outdir, "/", in_filename_metadata)
+  cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
+}
+
+# Input 2: read <gene>_meta data.csv
+cat("\nReading meta data file: ", infile_metadata)
+
+gene_metadata <- read.csv(infile_metadata
+                          , stringsAsFactors = F
+                          , header = T)
+
+all_plot_dfs = combining_dfs_plotting(my_df_u
+                                      , gene_metadata
+                                      , lig_dist_colname = LigDist_colname
+                                      , lig_dist_cutoff = LigDist_cutoff)
+
+merged_df2          = all_plot_dfs[[1]]
+merged_df3          = all_plot_dfs[[2]]
+merged_df2_comp     = all_plot_dfs[[3]]
+merged_df3_comp     = all_plot_dfs[[4]]
+merged_df2_lig      = all_plot_dfs[[5]]
+merged_df3_lig      = all_plot_dfs[[6]]
+merged_df2_comp_lig = all_plot_dfs[[7]]
+merged_df3_comp_lig = all_plot_dfs[[8]]
+
+####################################################################
+#                        Data for logoplots
+####################################################################
+#-------------------------
+# choose df for logoplot
+#-------------------------
+logo_data = merged_df3
+#logo_data = merged_df3_comp
+
+# quick checks
+colnames(logo_data)
+str(logo_data)
+
+c1 = unique(logo_data$position) 
+nrow(logo_data)
+cat("No. of rows in my_data:", nrow(logo_data)
+    , "\nDistinct positions corresponding to snps:", length(c1)
+    , "\n===========================================================")
+#=======================================================================
+#%% logo plots from dataframe
+
+#############
+# PLOTS
+#############
+foo = logo_data[, c("position"
+                      , "mutant_type","duet_scaled", "or_mychisq"
+                      , "mut_prop_polarity", "mut_prop_water")] 
+
+logo_data$log10or = log10(logo_data$or_mychisq)
+logo_data_plot = logo_data[, c("position"
+                            , "mutant_type", "or_mychisq", "log10or")]
+
+logo_data_plot_or = logo_data[, c("position", "mutant_type", "or_mychisq")]
+wide_df_or <- logo_data_plot_or %>% spread(position, or_mychisq, fill = 0.0)
+
+wide_df_or = as.matrix(wide_df_or)
+rownames(wide_df_or) = wide_df_or[,1]
+dim(wide_df_or)
+wide_df_or = wide_df_or[,-1]
+str(wide_df_or)
+
+position_or = as.numeric(colnames(wide_df_or))
+
+#==================
+# logo data: logOR
+#==================
+# extracting data with log10R
+logo_data_plot_logor = logo_data[, c("position", "mutant_type", "log10or")]
+wide_df_logor <- logo_data_plot_logor %>% spread(position, log10or, fill = 0.0)
+
+wide_df_logor = as.matrix(wide_df_logor)
+
+rownames(wide_df_logor) = wide_df_logor[,1]
+wide_df_logor = subset(wide_df_logor, select = -c(1) )
+colnames(wide_df_logor)
+wide_df_logor_m = data.matrix(wide_df_logor)
+
+rownames(wide_df_logor_m)
+colnames(wide_df_logor_m)
+
+position_logor = as.numeric(colnames(wide_df_logor_m))
+########################################################################
+#                           End of script
+########################################################################
\ No newline at end of file