From 4398c049ca98ef015c422edee370ff5cb9157718 Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Wed, 23 Sep 2020 11:12:41 +0100
Subject: [PATCH] added foldx scaled and foldx outcome to plotting_data.R

---
 scripts/plotting/extreme_muts.R     | 16 +---------
 scripts/plotting/other_plots_data.R |  3 +-
 scripts/plotting/plotting_data.R    | 46 +++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/scripts/plotting/extreme_muts.R b/scripts/plotting/extreme_muts.R
index 52e50a9..65943aa 100644
--- a/scripts/plotting/extreme_muts.R
+++ b/scripts/plotting/extreme_muts.R
@@ -118,7 +118,6 @@ df_s_foldx = df[df$foldx_outcome == "Stabilising",]
 hs_foldx = df_s_foldx[df_s_foldx$ddg == min(df_s_foldx$ddg), ]
 hs_foldx
 
-
 #===============
 # active site muts
 #===============
@@ -132,8 +131,6 @@ cat("No. of active site residues within", aa_dist, ":", nrow(aa_muts))
 #====================
 # budding hotspots
 #====================
-
-# Method 
 # this is what you want
 foo = merged_df3 %>% group_by(position) %>% tally()
 bar = merged_df3 %>% group_by(position) %>% count()
@@ -149,15 +146,4 @@ n_mult_muts_sites = sum(table(foo$n)) - (table(foo$n)[[1]] - table(foo$n)[[2]])
 cat("No of budding hotspots (sites with 2 mutations):", n_budding_sites
     , "\nNo. of sites with mutiple (>2) mutations:", n_mult_muts_sites)
 
-# another way
-setDT(merged_df3)[, pos_count := .N, by = .(position)]
-
-# this is cummulative
-table(merged_df3$pos_count)
-
-# use group by on this: same as the 
-snpsBYpos_df <- merged_df3 %>%
-  group_by(position) %>%
-  summarize(snpsBYpos = mean(pos_count))
-
-
+#==========================================================================
\ No newline at end of file
diff --git a/scripts/plotting/other_plots_data.R b/scripts/plotting/other_plots_data.R
index c89a010..df5c1e3 100644
--- a/scripts/plotting/other_plots_data.R
+++ b/scripts/plotting/other_plots_data.R
@@ -59,7 +59,6 @@ if (my_min == -1 && my_max == 1){
   cat("FAIL: could not scale foldx ddg values"
       , "Aborting!")
 }
- 
 
 #================================
 # adding foldx outcome category
@@ -76,7 +75,7 @@ if ( all(c1 == c2) ){
   cat("FAIL: foldx outcome could not be created. Aborting!")
   exit()
 }
-
+#=======================================================================
 # name tidying
 df_ps$mutation_info = as.factor(df_ps$mutation_info)
 df_ps$duet_outcome = as.factor(df_ps$duet_outcome)
diff --git a/scripts/plotting/plotting_data.R b/scripts/plotting/plotting_data.R
index 6f6c3ec..0c15a78 100755
--- a/scripts/plotting/plotting_data.R
+++ b/scripts/plotting/plotting_data.R
@@ -63,6 +63,52 @@ my_df = read.csv(infile_params, header = T)
 
 cat("\nInput dimensions:", dim(my_df)) 
 
+###########################
+# add foldx outcome category
+# and foldx scaled values 
+
+# This will enable to always have these variables available
+# when calling for plots
+###########################
+
+#------------------------------
+# adding foldx scaled values
+# scale data b/w -1 and 1
+#------------------------------
+n = which(colnames(my_df) == "ddg"); n 
+
+my_min = min(my_df[,n]); my_min 
+my_max = max(my_df[,n]); my_max 
+
+my_df$foldx_scaled = ifelse(my_df[,n] < 0
+                            , my_df[,n]/abs(my_min)
+                            , my_df[,n]/my_max) 
+# sanity check
+my_min = min(my_df$foldx_scaled); my_min 
+my_max = max(my_df$foldx_scaled); my_max
+
+if (my_min == -1 && my_max == 1){
+  cat("PASS: foldx ddg successfully scaled b/w -1 and 1"
+      , "\nProceeding with assigning foldx outcome category")
+}else{
+  cat("FAIL: could not scale foldx ddg values"
+      , "Aborting!")
+}
+
+#------------------------------
+# adding foldx outcome category
+# ddg<0 = "Stabilising" (-ve)
+#------------------------------
+c1 = table(my_df$ddg < 0)
+my_df$foldx_outcome = ifelse(my_df$ddg < 0, "Stabilising", "Destabilising")
+c2 = table(my_df$ddg < 0)
+
+if ( all(c1 == c2) ){
+  cat("PASS: foldx outcome successfully created")
+}else{
+  cat("FAIL: foldx outcome could not be created. Aborting!")
+  exit()
+}
 
 ###########################
 # extract unique mutation entries