From ca1a0e10cad5c3d5904dbddc14c3115e258370ad Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Mon, 14 Jun 2021 09:06:30 +0100
Subject: [PATCH 1/4] added example for layout

---
 scripts/plotting/myshiny/app_layout_eg.R | 106 +++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 scripts/plotting/myshiny/app_layout_eg.R

diff --git a/scripts/plotting/myshiny/app_layout_eg.R b/scripts/plotting/myshiny/app_layout_eg.R
new file mode 100644
index 0000000..d7943fd
--- /dev/null
+++ b/scripts/plotting/myshiny/app_layout_eg.R
@@ -0,0 +1,106 @@
+# 
+# 
+# Home
+# Public
+# 
+# Questions
+# Tags
+# Users
+# Find a Job
+# Jobs
+# Companies
+# 
+# Teams
+# Stack Overflow for Teams – Collaborate and share knowledge with a private group. 
+# 
+# How can put multiple plots side-by-side in shiny r?
+#     Asked 5 years, 5 months ago
+# Active 2 years, 1 month ago
+# Viewed 59k times
+# 28
+# 11
+# 
+# In mainpanel, I try to handle this problem via fluidrow. However, one of my plot is optional to be displayed or not by users. When user clicks the button, the second plot appears below the first plot.
+# 
+# fluidRow(
+#     column(2, align="right",
+#            plotOutput(outputId = "plotgraph1", width  = "500px",height = "400px"),  
+#            plotOutput(outputId = "plotgraph2", width  = "500px",height = "400px")
+#     ))
+# 
+# I played with "align" and "widths", but nothing changed.
+# r
+# plot
+# ggplot2
+# shiny
+# Share
+# Improve this question
+# Follow
+# edited Jan 21 '17 at 17:51
+# Mike Wise
+# 18.8k66 gold badges7171 silver badges9595 bronze badges
+# asked Dec 20 '15 at 19:25
+# can.u
+# 42511 gold badge44 silver badges1111 bronze badges
+# Add a comment
+# 3 Answers
+# 30
+# 
+# So it is a couple years later, and while the others answers - including mine - are still valid, it is not how I would recommend approaching it today. Today I would lay it out using the grid.arrange from the gridExtra package.
+# 
+# It allows any number of plots, and can lay them out in a grid checkerboard-like. (I was erroneously under the impression splitLayout only worked with two).
+# It has more customization possibilities (you can specify rows, columns, headers, footer, padding, etc.)
+# It is ultimately easier to use, even for two plots, since laying out in the UI is finicky - it can be difficult to predict what Bootstrap will do with your elements when the screen size changes.
+# Since this question gets a lot of traffic, I kind of think more alternative should be here.
+# 
+# The cowplot package is also worth looking into, it offers similar functionality, but I am not so familiar with it.
+# 
+# Here is a small shiny program demonstrating that:
+#     
+library(shiny)
+library(ggplot2)
+library(gridExtra)
+
+u <- shinyUI(fluidPage(
+    titlePanel("title panel"),
+    sidebarLayout(position = "left",
+                  sidebarPanel("sidebar panel",
+                               checkboxInput("donum1", "Make #1 plot", value = T),
+                               checkboxInput("donum2", "Make #2 plot", value = F),
+                               checkboxInput("donum3", "Make #3 plot", value = F),
+                               sliderInput("wt1","Weight 1",min=1,max=10,value=1),
+                               sliderInput("wt2","Weight 2",min=1,max=10,value=1),
+                               sliderInput("wt3","Weight 3",min=1,max=10,value=1)
+                  ),
+                  mainPanel("main panel",
+                            column(6,plotOutput(outputId="plotgraph", width="500px",height="400px"))
+                  ))))
+
+s <- shinyServer(function(input, output) 
+{
+    set.seed(123)
+    pt1 <- reactive({
+        if (!input$donum1) return(NULL)
+        qplot(rnorm(500),fill=I("red"),binwidth=0.2,main="plotgraph1")
+    })
+    pt2 <- reactive({
+        if (!input$donum2) return(NULL)
+        qplot(rnorm(500),fill=I("blue"),binwidth=0.2,main="plotgraph2")
+    })
+    pt3 <- reactive({
+        if (!input$donum3) return(NULL)
+        qplot(rnorm(500),fill=I("green"),binwidth=0.2,main="plotgraph3")
+    })
+    output$plotgraph = renderPlot({
+        ptlist <- list(pt1(),pt2(),pt3())
+        wtlist <- c(input$wt1,input$wt2,input$wt3)
+        # remove the null plots from ptlist and wtlist
+        to_delete <- !sapply(ptlist,is.null)
+        ptlist <- ptlist[to_delete] 
+        wtlist <- wtlist[to_delete]
+        if (length(ptlist)==0) return(NULL)
+        
+        grid.arrange(grobs=ptlist,widths=wtlist,ncol=length(ptlist))
+    })
+})
+shinyApp(u,s)

From 81ab3fe5ba3a8b7c6f517ce6face85bdaa3fe4cd Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Mon, 14 Jun 2021 09:22:05 +0100
Subject: [PATCH 2/4] added function and test for aa_prop_bp.R

---
 scripts/functions/aa_prop_bp.R      | 22 ++++++++++
 scripts/functions/test_aa_prop_bp.R | 62 +++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 scripts/functions/aa_prop_bp.R
 create mode 100644 scripts/functions/test_aa_prop_bp.R

diff --git a/scripts/functions/aa_prop_bp.R b/scripts/functions/aa_prop_bp.R
new file mode 100644
index 0000000..3eb9da6
--- /dev/null
+++ b/scripts/functions/aa_prop_bp.R
@@ -0,0 +1,22 @@
+library(ggplot2)
+library(tidyverse)
+library(data.table)
+########################################################################
+# TASK: barplots to colour nssnps by chosen aa_property
+########################################################################
+aa_prop_bp <- function(plotdf
+                       , position_colname
+                       , fill_colname
+                       , fill_colours
+                       , leg_name = "LEGEND"){
+  mp = ggplot(df, aes(x = as.factor(eval(parse(text = pos_colname)))
+                      #, y = ..count..
+                      , fill = eval(parse(text = fill_colname))
+  )) +
+    geom_bar(position = 'identity') +
+    scale_fill_manual(values = fill_colours
+                      , name = leg_name)
+  return(mp)
+}
+
+
diff --git a/scripts/functions/test_aa_prop_bp.R b/scripts/functions/test_aa_prop_bp.R
new file mode 100644
index 0000000..29931cc
--- /dev/null
+++ b/scripts/functions/test_aa_prop_bp.R
@@ -0,0 +1,62 @@
+library(ggplot2)
+library(tidyverse)
+library(data.table)
+
+setwd("~/git/LSHTM_analysis/scripts/plotting/functions")
+getwd()
+#############################################################
+#===========================================
+# load functions, data, dirs, hardocded vars
+# that will be used in testing the functions
+#===========================================
+source("../plotting_data.R")
+infile = "/home/tanu/git/Data/streptomycin/output/"
+pd_df = plotting_data(infile)
+my_df       = pd_df[[1]]
+my_df_u     = pd_df[[2]]
+my_df_u_lig = pd_df[[3]]
+dup_muts    = pd_df[[4]]
+
+source("../plotting_globals.R")
+drug = "streptomycin"
+gene = "gid"
+
+import_dirs(drug, gene)
+
+#=====================
+# functions to test 
+#=====================
+source("stability_count_bp.R")
+source("position_count_bp.R")
+#################################################################
+##############################################
+# read a sample file containing muts and prop 
+###############################################
+df<- read.csv(file.choose())
+
+setDT(df)[, pos_count := .N, by = .(position)]
+foo = data.frame(df$position, df$pos_count)
+
+#snpsBYpos_df <- df %>%
+#  group_by(position) %>%
+#  summarize(snpsBYpos = mean(pos_count))
+
+# subset df without duplicates for position
+df2 = df[!duplicated(df$position)]
+##################################################################
+# ---------------------------------------
+# barplot for nssnps, coloured by aa prop
+# ---------------------------------------
+pos_colname = "position"
+aa_prop_colname = "mut_prop_water"
+aa_prop_colours = c("black", "blue")
+my_legname = "aa_prop: water"
+
+# call function
+aa_prop_bp(plotdf = df
+           , position_colname = pos_colname
+           , fill_colname = aa_prop_colname
+           , fill_colours = aa_prop_cols
+           , leg_name = my_legname)
+           
+#===============================================================

From 57e4d8cd1e0846ce402d5f141a974712b19f7bd7 Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Mon, 14 Jun 2021 13:22:56 +0100
Subject: [PATCH 3/4] changed aa_prop_water to 3 categ according to KD, updated
 ref dict

---
 scripts/mut_electrostatic_changes.py |  8 +++++---
 scripts/reference_dict.py            | 10 ++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/scripts/mut_electrostatic_changes.py b/scripts/mut_electrostatic_changes.py
index 243ff93..5e74b7c 100755
--- a/scripts/mut_electrostatic_changes.py
+++ b/scripts/mut_electrostatic_changes.py
@@ -5,13 +5,15 @@ Created on Tue Aug  6 12:56:03 2019
 
 @author: tanu
 '''
-
 # FIXME: import dirs.py to get the basic dir paths available
 #=======================================================================
 # TASK: calculate how many mutations result in 
 # electrostatic changes wrt wt
 
-# Input: merged_df3
+# Input: mCSM-normalised file or any file containing one-letter aa code 
+# of wt and mut
+# Note: this can be easily modified into 3-letter code
+# TODO: turn to a function
 
 # Output: mut_elec_changes_results.txt 
 #=======================================================================
@@ -215,4 +217,4 @@ print('Total no. of mutations: ', total_muts
       , '\nTotal no. of unchanged muts: ', no_change_muts.mut_count.sum() 
       , '\n=======================================================')  
 #%% end of script
-#=======================================================================
+#=======================================================================
\ No newline at end of file
diff --git a/scripts/reference_dict.py b/scripts/reference_dict.py
index ee431c3..41aeaf8 100755
--- a/scripts/reference_dict.py
+++ b/scripts/reference_dict.py
@@ -132,9 +132,11 @@ qualities_taylor = { ('R', 'H', 'K'): 'Basic'
              , ('C', 'G', 'P'): 'Special'
 }
 
-# binary classification: hydrophilic or hydrophobic
-qualities_water = { ('D', 'E', 'N', 'P', 'Q', 'R', 'S'): 'hydrophilic'
-                   , ('A', 'C', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'T', 'V', 'W', 'X', 'Y'): 'hydrophobic'
+# ternary classification: hydrophobic --> neutral --> hydrophilic (KD scale)
+#http://www.imgt.org/IMGTeducation/Aide-memoire/_UK/aminoacids/IMGTclasses.html
+qualities_water = { ('I','V','L','F','C','M','A','W'): 'hydrophobic'
+                   , ('G','T','S','Y','P','H'): 'neutral'
+                   , ('N','D','Q','E','K','R'): 'hydrophilic'
 }
 
 # polarity: no overlap
@@ -232,4 +234,4 @@ for k, v in oneletter_aa_dict.items():
 # COMMENT:VOILA! two different keytypes set for dicts containinga all
 # associated aa properties
 #==============================================================================
-#%% end of script     
\ No newline at end of file
+#%% end of script     

From ddb1a7a7aa6622b1c3fa9378cd95600672d27ebb Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Mon, 14 Jun 2021 13:24:00 +0100
Subject: [PATCH 4/4] added aa_prop.py and add_aa_prop.py to add aa properties
 for wt and mutant in a given file containing one letter code wt and mut cols
 as csv

---
 scripts/aa_prop.py     | 113 ++++++++++++++++++++++++
 scripts/add_aa_prop.py | 191 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 304 insertions(+)
 create mode 100644 scripts/aa_prop.py
 create mode 100644 scripts/add_aa_prop.py

diff --git a/scripts/aa_prop.py b/scripts/aa_prop.py
new file mode 100644
index 0000000..6ed9222
--- /dev/null
+++ b/scripts/aa_prop.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+'''
+Created on Mon June 14 2021
+
+@author: tanu
+'''
+# FIXME: import dirs.py to get the basic dir paths available
+#=======================================================================
+# TASK
+
+# Input:
+
+# Output: 
+#=======================================================================
+#%% load libraries
+import os, sys
+import pandas as pd
+#import numpy as np
+#from varname import nameof
+import argparse
+
+DEBUG = False
+#=======================================================================
+#%% specify input and curr dir
+homedir = os.path.expanduser('~')
+
+# set working dir
+os.getcwd()
+os.chdir(homedir + '/git/LSHTM_analysis/scripts')
+os.getcwd()
+
+from reference_dict import oneletter_aa_dict 
+#=======================================================================
+#%%
+def get_aa_prop(df, col1 = 'aap1', col2 = 'aap2', col3 = 'aap_taylor', col4 = 'aap_kd', col5 = 'aap_polarity', col6 = 'aap_calcprop'):
+
+    """Add amino acid properties for wt and mutant residues
+    
+    @df: df containing one letter aa code for wt and mutant respectively
+    @type: pandas df
+    
+    @col1: column adding 7 aa categories (no overlap; acidic, basic, amidic, hydrophobic, hydroxylic, aromatic, sulphur)
+    @type: str
+    
+    @col2: column adding 9 aa categories (overlap; acidic, basic, polar, hydrophobic, hydrophilic, small, aromatic, aliphatic, special)
+    @type: str
+    
+    @col3: column adding 8 aa categories (overlap; acidic, basic, polar, hydrophobic, small, aromatic, aliphatic, special)
+    @type: str
+
+    @col4: column adding 3 aa categories (no overlap, hydrophobic, neutral and hydrophilic according to KD scale)
+    @type: str
+
+    @col5: column adding 4 aa categories (no overlap, acidic, basic, neutral, non-polar)
+    @type: str
+
+    @col6: column adding 4 aa categories (neg, pos, polar, non-polar)
+    @type: str
+    
+    returns df: with 6 added columns. If column names clash, the function column 
+                name will override original column
+    @rtype: pandas df
+    """
+
+    lookup_dict_p1 = dict()
+    lookup_dict_p2 = dict()
+    
+    lookup_dict_taylor = dict()
+    lookup_dict_kd = dict()
+    
+    lookup_dict_polarity = dict()
+    lookup_dict_calcprop = dict()
+    
+    for k, v in oneletter_aa_dict.items():
+      
+        lookup_dict_p1[k] = v['aa_prop1']
+        
+        lookup_dict_p2[k] = v['aa_prop2']
+              
+        lookup_dict_taylor[k] = v['aa_taylor']
+        
+        lookup_dict_kd[k] = v['aa_prop_water']
+                
+        lookup_dict_polarity[k] = v['aa_prop_polarity']
+        
+        lookup_dict_calcprop[k] = v['aa_calcprop']      
+        
+        #if DEBUG:
+        #    print('Key:', k, 'value:', v
+        #          , '\n============================================================'
+        #          , '\nlook up dict:\n')
+            
+        df['wt_aap1'] = df['wild_type'].map(lookup_dict_p1)   
+        df['mut_aap1'] = df['mutant_type'].map(lookup_dict_p1)    
+            
+        df['wt_aap2'] = df['wild_type'].map(lookup_dict_p2)   
+        df['mut_aap2'] = df['mutant_type'].map(lookup_dict_p2)   
+
+        df['wt_aap_taylor'] = df['wild_type'].map(lookup_dict_taylor)   
+        df['mut_aap_taylor'] = df['mutant_type'].map(lookup_dict_taylor)   
+
+        df['wt_aap_kd'] = df['wild_type'].map(lookup_dict_kd)   
+        df['mut_aap_kd'] = df['mutant_type'].map(lookup_dict_kd)   
+
+        df['wt_aap_polarity'] = df['wild_type'].map(lookup_dict_polarity)   
+        df['mut_aap_polarity'] = df['mutant_type'].map(lookup_dict_polarity)           
+                    
+        df['wt_aa_calcprop'] = df['wild_type'].map(lookup_dict_calcprop)   
+        df['mut_aa_calcprop'] = df['mutant_type'].map(lookup_dict_calcprop)
+
+    return df
+#========================================
\ No newline at end of file
diff --git a/scripts/add_aa_prop.py b/scripts/add_aa_prop.py
new file mode 100644
index 0000000..ed2466d
--- /dev/null
+++ b/scripts/add_aa_prop.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+'''
+Created on Tue Aug  6 12:56:03 2019
+
+@author: tanu
+'''
+# FIXME: import dirs.py to get the basic dir paths available
+#=======================================================================
+# TASK: 
+# 
+
+# Input: 
+
+# Output: 
+#=======================================================================
+#%% load libraries
+import os, sys
+import pandas as pd
+#import numpy as np
+#from varname import nameof
+import argparse
+#=======================================================================
+#%% specify input and curr dir
+homedir = os.path.expanduser('~')
+
+# set working dir
+os.getcwd()
+os.chdir(homedir + '/git/LSHTM_analysis/scripts')
+os.getcwd()
+
+from aa_prop import get_aa_prop 
+#=======================================================================
+#%% command line args
+arg_parser = argparse.ArgumentParser()
+arg_parser.add_argument('-d', '--drug', help='drug name', default = None)
+arg_parser.add_argument('-g', '--gene', help='gene name', default = None)
+
+arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
+arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
+arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
+
+arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') # not used atm
+
+args = arg_parser.parse_args()
+#%% variable assignment: input and output 
+drug    = args.drug
+gene    = args.gene
+datadir = args.datadir
+indir   = args.input_dir
+outdir  = args.output_dir
+
+#%%=======================================================================
+#==============
+# directories
+#==============
+if not datadir:
+    datadir = homedir + '/' + 'git/Data'
+    
+if not indir:
+    indir = datadir + '/' + drug + '/input'
+    
+if not outdir:
+    outdir = datadir + '/' + drug + '/output'
+
+#=======
+# input
+#=======
+#in_filename  = 'merged_df3.csv'
+#in_filename  = gene.lower() + '_complex_mcsm_norm.csv'
+in_filename_mcsm  = gene.lower() + '_complex_mcsm_norm_SRY.csv' # gid
+infile_mcsm = outdir + '/' + in_filename_mcsm
+print('Input file: ', infile_mcsm
+      , '\n============================================================')
+
+#=======
+# output 
+#=======
+out_filename_aaps = gene.lower() + '_mcsm_aaps.txt'
+outfile_aaps =  outdir + '/' + out_filename_aaps
+print('Output file: ', outfile_aaps
+      , '\n============================================================')
+
+#%% end of variable assignment for input and output files
+#=======================================================================
+#%% Read input files
+print('Reading input file (merged file):', infile_mcsm)
+
+comb_df = pd.read_csv(infile_mcsm, sep = ',')  
+
+print('Input filename: ', in_filename_mcsm
+      , '\nPath :', outdir
+      , '\nNo. of rows: ', len(comb_df)
+      , '\nNo. of cols: ', len(comb_df.columns)
+      , '\n============================================================')
+
+# column names
+list(comb_df.columns)
+
+#%% sanity check
+nrows_df = len(comb_df)
+ncols_add =  12
+expected_cols = len(comb_df.columns) + ncols_add
+print('\nAdding aa properties for wt and mutant: ', ncols_add, ' columns in total'
+	, '\n===============================================================')
+
+#%% call get_aa_prop():
+get_aa_prop(df = comb_df)
+
+#%% check dim of df
+if len(comb_df) == nrows_df and len(comb_df.columns) == expected_cols:
+    print('Checking dim of df: '
+          , '\nPASS: df dim match'
+          , '\nno.of rows: ', len(comb_df)
+          , '\nno. of cols: ', len(comb_df.columns))
+else:
+    print('\FAIL: dim mismatch'
+             , 'Expected rows: ', nrows_df
+             , '\Got: ', len(comb_df)
+             , '\nExpected cols: ', expected_cols
+             , '\nGot: ', len(comb_df.columns))
+    sys.exit('Aborting')
+#%% summary output
+sys.stdout = open(outfile_aaps, 'w')
+
+print('################################################'
+      , '\n aa properties: ', gene 
+      , '\n################################################')
+
+print('\n-------------------'
+      , '\nWild-type: aap 1'
+      , '\n-------------------\n'
+      , comb_df['wt_aap1'].value_counts()
+      , '\n-------------------'
+      , '\nmutant-type: aap 1'
+      , '\n-------------------\n'
+      , comb_df['mut_aap1'].value_counts()
+      , '\n===================================================')
+
+print('\n-------------------'
+      , '\nWild-type: aap 2'
+      , '\n-------------------\n'
+      , comb_df['wt_aap2'].value_counts()
+      , '\n-------------------'
+      , '\nmutant-type: aap 2'
+      , '\n-------------------\n'
+      , comb_df['mut_aap2'].value_counts()
+      , '\n===================================================')
+
+print('\n-------------------'
+      , '\nWild-type: aap taylor'
+      , '\n-------------------\n'
+      , comb_df['wt_aap_taylor'].value_counts()
+      , '\n-------------------'
+      , '\nmutant-type: taylor'
+      , '\n-------------------\n'
+      , comb_df['mut_aap_taylor'].value_counts()
+      , '\n===================================================')
+
+print('\n-------------------'
+      , '\nWild-type: aap water/kd'
+      , '\n-------------------\n'
+      , comb_df['wt_aap_kd'].value_counts()
+      , '\n-------------------'
+      , '\nmutant-type: water/kd'
+      , '\n-------------------\n'
+      , comb_df['mut_aap_kd'].value_counts()
+      , '\n===================================================')
+
+print('\n-------------------'
+      , '\nWild-type: aap polarity'
+      , '\n-------------------\n'
+      , comb_df['wt_aap_polarity'].value_counts()
+      , '\n-------------------'
+      , '\nmutant-type: polarity'
+      , '\n-------------------\n'
+      , comb_df['mut_aap_polarity'].value_counts()
+      , '\n===================================================')
+
+print('\n-------------------'
+      , '\nWild-type: aa calcprop'
+      , '\n-------------------\n'
+      , comb_df['wt_aa_calcprop'].value_counts()
+      , '\n-------------------'
+      , '\nmutant-type: aa calcprop'
+      , '\n-------------------\n'
+      , comb_df['mut_aa_calcprop'].value_counts()
+      , '\n===================================================')
+
+#%% end of script
+#=======================================================================