Merge branch 'master' of https://git.tunstall.in/tanu/LSHTM_analysis

2021-02-09 16:12:34 +00:00 · 2021-02-09 16:12:34 +00:00 · bcf4467c44
commit bcf4467c44
parent 6b6921d45f 64018cce4c
17 changed files with 1538 additions and 109 deletions
--- a/dynamut/dynamut.py
+++ b/dynamut/dynamut.py
@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Aug 19 14:33:51 2020
+
+@author: tanu
+"""
+
+
+#%% load packages
+import os,sys
+import subprocess
+import argparse
+import requests
+import re
+import time
+from bs4 import BeautifulSoup
+import pandas as pd
+from pandas.api.types import is_string_dtype
+from pandas.api.types import is_numeric_dtype
+#%%============================================================================
+
+#1) define muts batch
+#take mcsm file
+#split into 'n' batches
+#write output file with suffix of batch number
+
+
+#********** done this par ****************
+#2) get results for a batch url
+# read  file
+# store batch url
+#extract  number
+#build single url
+#build single results urls
+#get results and store them in df
+#update df 
+#dim of df = no. of muts in batch
+
+#3) format results
+# store unit measurements separtely
+# omit unit measurements from cols
+# create extra columns '_outcome' suffix by splitting numerical output
+# create separate col for  mcsm as it doesn't have output text
+      
+#%%============================================================================
--- a/dynamut/dynamut_test.py
+++ b/dynamut/dynamut_test.py
@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Aug 19 14:33:51 2020
+
+@author: tanu
+"""
+
+
+#%% load packages
+import os,sys
+import subprocess
+import argparse
+import requests
+import re
+import time
+from bs4 import BeautifulSoup
+import pandas as pd
+from pandas.api.types import is_string_dtype
+from pandas.api.types import is_numeric_dtype
+#%%============================================================================
+
+batch_result_url = 'http://biosig.unimelb.edu.au/dynamut/results_prediction/15955901077'
+
+mut = 'S104R'
+single_result_url = 'http://biosig.unimelb.edu.au/dynamut/single_results/15955901077' + '/' + mut
+
+
+
+#%%============================================================================
+param_dict = {}
+
+result_response = requests.get(single_result_url)
+if result_response.status_code == 200:
+        print('Fetching results')
+        # extract results using the html parser          
+        soup = BeautifulSoup(result_response.text, features = 'html.parser')
+        #web_result_raw = soup.find(id = 'predictions').get_text()
+        ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
+        ddg_encom = soup.find(id = 'ddg_encom').get_text()
+        ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
+        ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
+        ddg_duet = soup.find(id = 'ddg_duet').get_text()
+        dds_encom = soup.find(id = 'dds_encom').get_text()
+        
+        param_dict = {"mutationinformation" : mut
+            , "ddg_dynamut" : ddg_dynamut
+            , "ddg_encom"   : ddg_encom
+            , "ddg_mcsm"    : ddg_mcsm
+            , "ddg_sdm"     : ddg_sdm
+            , "ddg_duet"    : ddg_duet
+            , "dds_encom"   : dds_encom
+            
+            }
+        results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T  
+
+#%% for loop
+#%%
+host_dynamut = 'http://biosig.unimelb.edu.au/dynamut'
+batch_url_number = re.search(r'([0-9]+)$', batch_result_url).group(0) 
+single_url = host_dynamut + '/single_results/' + batch_url_number
+     
+muts = ["S104R", "G24R"]
+
+# initilialise empty df
+dynamut_results_df = pd.DataFrame()
+
+for i, mut in enumerate(muts):
+    #param_dict = {}
+    print('Running mutation', i, ':', mut)
+    snp = mut
+    single_result_url = single_url + '/' + snp
+    print('Getting results from:',  single_result_url)
+    
+    result_response = requests.get(single_result_url)
+    if result_response.status_code == 200:
+            print('Fetching results')
+            # extract results using the html parser          
+            soup = BeautifulSoup(result_response.text, features = 'html.parser')
+            #web_result_raw = soup.find(id = 'predictions').get_text()
+            ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
+            ddg_encom = soup.find(id = 'ddg_encom').get_text()
+            ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
+            ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
+            ddg_duet = soup.find(id = 'ddg_duet').get_text()
+            dds_encom = soup.find(id = 'dds_encom').get_text()
+            
+            param_dict = {"mutationinformation" : snp
+                , "ddg_dynamut" : ddg_dynamut
+                , "ddg_encom"   : ddg_encom
+                , "ddg_mcsm"    : ddg_mcsm
+                , "ddg_sdm"     : ddg_sdm
+                , "ddg_duet"    : ddg_duet
+                , "dds_encom"   : dds_encom 
+                }
+            results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
+            print(results_df)
+            dynamut_results_df = dynamut_results_df.append(results_df)
+            print(dynamut_results_df)
+            
+      
--- a/foldx/mutrenamefiles.sh
+++ b/foldx/mutrenamefiles.sh
@ -2,7 +2,7 @@ PDB=$1
 n=$2
 OUTDIR=$3
 cd ${OUTDIR}
-logger "Running mutrenamefiles with PDB: ${PDB} n: ${n} OUTDIR: ${OUTDIR}"
+
 cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt
 sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt
 sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt
@ -61,9 +61,3 @@ cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidue
 sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
 cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
 sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
-
-
-
-
-
-
--- a/foldx/renamefiles.sh
+++ b/foldx/renamefiles.sh
@ -1,7 +1,7 @@
 PDB=$1
 OUTDIR=$2
 cd ${OUTDIR}
-logger "Running renamefiles"
+
 cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
 sed -i '1,8d' Dif_${PDB}_Repair.txt
 cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt
@ -62,9 +62,3 @@ cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Vol
 sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
 cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
 sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
-
-
-
-
-
-
--- a/foldx/runFoldx.py
+++ b/foldx/runFoldx.py
@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import subprocess
 import os
+import sys
 import numpy as np
 import pandas as pd
 from contextlib import suppress
@ -8,6 +9,8 @@ from pathlib import Path
 import re
 import csv
 import argparse
+import shutil
+import time
 #https://realpython.com/python-pathlib/

 # FIXME
@ -22,8 +25,8 @@ homedir = os.path.expanduser('~')

 # set working dir
 os.getcwd()
-os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
-os.getcwd()
+#os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
+#os.getcwd()

 #=======================================================================
 #%% command line args
@ -35,11 +38,12 @@ arg_parser.add_argument('-g', '--gene',     help = 'gene name (case sensitive)',
 arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
 arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
 arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
-arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!')
+arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME

-arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
+arg_parser.add_argument('-P', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
 arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')

+# FIXME: Doesn't work with 2 chains yet!
 arg_parser.add_argument('-c1', '--chain1',    help = 'Chain1 ID', default = 'A') # case sensitive
 arg_parser.add_argument('-c2', '--chain2',    help = 'Chain2 ID', default = 'B') # case sensitive

@ -64,11 +68,20 @@ chainA       = args.chain1
 chainB        = args.chain2
 pdb_filename  = args.pdb_file

+
 # os.path.splitext will fail interestingly with file.pdb.txt.zip
 #pdb_name = os.path.splitext(pdb_file)[0]
 # Just the filename, thanks
 #pdb_name = Path(in_filename_pdb).stem

+
+# Handle the case where neither 'drug' 
+# nor (indir,outdir,process_dir) are defined
+if not drug:
+    if not indir or not outdir or not process_dir:
+        print('ERROR: if "drug" is not specified, you must specify Input, Output, and Process directories')
+        sys.exit()
+
 #==============
 # directories
 #==============
@ -83,27 +96,37 @@ if not outdir:

 #TODO: perhaps better handled by refactoring code to prevent generating lots of output files!
 if not process_dir:
-    process_dir = datadir + '/' + drug +'/' + 'processing'
+    process_dir = datadir + '/' + drug + '/processing'
+
+# Make all paths absolute in case the user forgot
+indir = os.path.abspath(indir)
+process_dir = os.path.abspath(process_dir)
+outdir = os.path.abspath(outdir)
+datadir = os.path.abspath(datadir)

 #=======
 # input
 #=======
 # FIXME
 if pdb_filename:
+    pdb_filename = os.path.abspath(pdb_filename)
    pdb_name = Path(pdb_filename).stem
+    infile_pdb = pdb_filename 
 else:
    pdb_filename = gene.lower() + '_complex.pdb'
    pdb_name = Path(pdb_filename).stem
-
    infile_pdb = indir + '/' + pdb_filename
+    
 actual_pdb_filename = Path(infile_pdb).name

 if mut_filename:
-    mutation_file = mut_filename
+    mutation_file = os.path.abspath(mut_filename)
+    infile_muts = mutation_file
+    print('User-provided mutation file in use:', infile_muts)
 else:
-    mutation_file =  gene.lower() + '_mcsm_snps.csv'
-
+    mutation_file =  gene.lower() + '_mcsm_formatted_snps.csv'
    infile_muts = outdir + '/' + mutation_file
+    print('WARNING: Assuming default mutation file:', infile_muts)

 #=======
 # output 
@ -115,6 +138,7 @@ print('Arguments being passed:'
 , '\nDrug:', args.drug
 , '\ngene:', args.gene
 , '\ninput dir:', indir
+, '\nprocess dir:', process_dir
 , '\noutput dir:', outdir
 , '\npdb file:', infile_pdb
 , '\npdb name:', pdb_name
@ -123,6 +147,10 @@ print('Arguments being passed:'
 , '\nchain1:', args.chain1
 , '\noutput file:', outfile_foldx
 , '\n=============================================================')
+
+#### Delay for 10 seconds to check the params ####
+print('Sleeping for 10 seconds to give you time to cancel')
+time.sleep(10)
 #=======================================================================

 def getInteractionEnergy(filename):
@ -183,6 +211,19 @@ def loadFiles(df):
    f.close()
    return np.asarray(resultList, dtype=np.float32)

+# TODO: put the subprocess call in a 'def'
+#def repairPDB():
+#    subprocess.call(['foldx' 
+#    , '--command=RepairPDB'
+#    , '--pdb-dir=' + indir
+#    ,  '--pdb=' + actual_pdb_filename 
+#    , '--ionStrength=0.05'#
+#   , '--pH=7'
+#    , '--water=PREDICT'
+#    , '--vdwDesign=1'
+#    , 'outPDB=true'
+#    , '--output-dir=' + process_dir])
+
 #=======================================================================    
 def main():
    pdbname = pdb_name
@ -194,37 +235,128 @@ def main():
    nmuts = len(mutlist)
    print(nmuts)
    print(mutlist)
+    
    print('start')  
-    #subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir])
-    subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir])
+    # some common parameters for foldX
+    foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 '
    
-    print('end')
-    output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir])
+    print('\033[95mSTAGE: repair PDB (foldx subprocess) \033[0m')
+    print('Running foldx RepairPDB for WT')
+    subprocess.call(['foldx' 
+    , '--command=RepairPDB'
+    , foldx_common
+    , '--pdb-dir=' + os.path.dirname(pdb_filename)
+    ,  '--pdb=' + actual_pdb_filename 
+    , 'outPDB=true'
+    , '--output-dir=' + process_dir])
+    print('\033[95mCOMPLETED STAGE: repair PDB\033[0m')
+    print('\n==========================================================')
    
+    
+    print('\033[95mSTAGE: Foldx commands BM, PN and SD (foldx subprocess) for WT\033[0m')
+    print('Running foldx BuildModel for WT')
+    subprocess.call(['foldx' 
+    , '--command=BuildModel'
+    , foldx_common
+    , '--pdb-dir=' + process_dir
+    ,  '--pdb=' + pdbname + '_Repair.pdb'
+    , '--mutant-file="individual_list_' + pdbname +'.txt"'
+    , 'outPDB=true'
+    , '--numberOfRuns=1'
+    , '--output-dir=' + process_dir], cwd=process_dir)
+
+    print('Running foldx PrintNetworks for WT')
+    subprocess.call(['foldx' 
+    , '--command=PrintNetworks'
+    , '--pdb-dir=' + process_dir
+    ,  '--pdb=' + pdbname + '_Repair.pdb'
+    , '--water=PREDICT'
+    , '--vdwDesign=1'
+    , '--output-dir=' + process_dir], cwd=process_dir)
+
+    print('Running foldx SequenceDetail for WT')
+    subprocess.call(['foldx' 
+    , '--command=SequenceDetail'
+    , '--pdb-dir=' + process_dir
+    ,  '--pdb=' + pdbname + '_Repair.pdb'
+    , '--water=PREDICT'
+    , '--vdwDesign=1'
+    , '--output-dir=' + process_dir], cwd=process_dir)
+    print('\033[95mCOMPLETED STAGE: Foldx commands BM, PN and SD\033[0m')
+    print('\n==========================================================')
+    
+    
+    print('\033[95mSTAGE: Print Networks (foldx subprocess) for MT\033[0m')
    for n in range(1,nmuts+1):
-        print(n)
-        with suppress(Exception):
-            subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir])
+        print('\033[95mNETWORK:\033[0m', n)
+        print('Running foldx PrintNetworks for mutation', n)
+        subprocess.call(['foldx' 
+        , '--command=PrintNetworks'
+        , '--pdb-dir=' + process_dir
+        ,  '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
+        , '--water=PREDICT'
+        , '--vdwDesign=1'
+        , '--output-dir=' + process_dir], cwd=process_dir) 
+    print('\033[95mCOMPLETED STAGE: Print Networks (foldx subprocess) for MT\033[0m')
+    print('\n==========================================================')
    
+    
+    print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m')
    for n in range(1,nmuts+1):
-        print(n)
+        print('\033[95mMUTATION:\033[0m', n)
+        print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir ))    
+       #FIXME: bad design and needs to be done in a pythonic way
        with suppress(Exception):
            subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir])
+    print('\033[95mCOMPLETED STAGE: Rename Mutation Files (shell)\033[0m')
+    print('\n==========================================================')
    
+                
+    print('\033[95mSTAGE: Rename Files (shell) for WT\033[0m')
+    # FIXME: this is bad design and needs to be done in a pythonic way
    out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir])
+    print('\033[95mCOMPLETED STAGE: Rename Files (shell) for WT\033[0m')
+    print('\n==========================================================')
+    
    
    if comp=='y':
+        print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for WT\033[0m')
        chain1=chainA
        chain2=chainB
-        with suppress(Exception):
-            subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir])
-        for n in range(1,nmuts+1):
-            with suppress(Exception):
-                subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir])
+        subprocess.call(['foldx' 
+        , '--command=AnalyseComplex'
+        , '--pdb-dir=' + process_dir
+        ,  '--pdb=' + pdbname + '_Repair.pdb'
+        , '--analyseComplexChains=' + chain1 + ',' + chain2
+        , '--water=PREDICT'
+        , '--vdwDesign=1'
+        , '--output-dir=' + process_dir], cwd=process_dir)

-    interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS',
-                    'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
-                    'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
+        # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
+        ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout'
+        ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
+        shutil.copyfile(ac_source, ac_dest)
+        print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for WT:\033[0m', n)
+
+        for n in range(1,nmuts+1):
+            print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for mutation:\033[0m', n)
+            subprocess.call(['foldx' 
+            , '--command=AnalyseComplex'
+            , '--pdb-dir=' + process_dir
+            ,  '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
+            , '--analyseComplexChains=' + chain1 + ',' + chain2
+            , '--water=PREDICT'
+            , '--vdwDesign=1'
+            , '--output-dir=' + process_dir], cwd=process_dir)
+
+            # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
+            ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout'
+            ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt'
+            shutil.copyfile(ac_mut_source, ac_mut_dest)
+        print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
+        print('\n==========================================================')
+        
+    interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
    
    dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt'
    dGdata = pd.read_csv(dGdatafile, sep = '\t')
@ -266,8 +398,7 @@ def main():
        print(d)
        data[i+1] = d    
        
-    interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS',              'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
-'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']   
+    interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']   

    print(interactions)

@ -291,8 +422,7 @@ def main():
        print(len(IE))
        data = np.append(data,[IE], axis = 0)
        print(data)
-        interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS',                'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
-'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']  
+        interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']  

    mut_file = process_dir + '/individual_list_' + pdbname + '.txt'
    with open(mut_file) as csvfile:
@ -330,6 +460,7 @@ def main():
    #outputfilename = 'foldx_results_' + pdbname + '.csv'
    #results.to_csv(outputfilename)
    results2.to_csv(outputfilename, index = False)
+    print ('end')
    
 if __name__ == '__main__':
    main()
--- a/foldx/runFoldx5.py
+++ b/foldx/runFoldx5.py
@ -0,0 +1,466 @@
+#!/usr/bin/env python3
+import subprocess
+import os
+import sys
+import numpy as np
+import pandas as pd
+from contextlib import suppress
+from pathlib import Path
+import re
+import csv
+import argparse
+import shutil
+import time
+#https://realpython.com/python-pathlib/
+
+# FIXME
+#strong dependency of file and path names
+#cannot pass file with path. Need to pass them separately
+#assumptions made for dir struc as standard
+#datadir + drug + input
+
+#=======================================================================
+#%% specify input and curr dir
+homedir = os.path.expanduser('~')
+
+# set working dir
+os.getcwd()
+#os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
+#os.getcwd()
+
+#=======================================================================
+#%% command line args
+arg_parser = argparse.ArgumentParser()
+
+arg_parser.add_argument('-d', '--drug',     help = 'drug name', default = None)
+arg_parser.add_argument('-g', '--gene',     help = 'gene name (case sensitive)', default = None)
+
+arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
+arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
+arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
+arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME
+
+arg_parser.add_argument('-P', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
+arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
+
+# FIXME: Doesn't work with 2 chains yet!
+arg_parser.add_argument('-c1', '--chain1',    help = 'Chain1 ID', default = 'A') # case sensitive
+arg_parser.add_argument('-c2', '--chain2',    help = 'Chain2 ID', default = 'B') # case sensitive
+
+args = arg_parser.parse_args()
+#=======================================================================
+#%% variable assignment: input and output 
+#drug = 'pyrazinamide'
+#gene = 'pncA'
+#gene_match = gene + '_p.'
+#%%=====================================================================
+# Command line options
+drug          = args.drug
+gene          = args.gene
+
+datadir       = args.datadir
+indir         = args.input_dir
+outdir        = args.output_dir
+process_dir   = args.process_dir
+
+mut_filename  = args.mutation_file
+chainA        = args.chain1
+chainB        = args.chain2
+pdb_filename  = args.pdb_file
+
+
+# os.path.splitext will fail interestingly with file.pdb.txt.zip
+#pdb_name = os.path.splitext(pdb_file)[0]
+# Just the filename, thanks
+#pdb_name = Path(in_filename_pdb).stem
+
+
+# Handle the case where neither 'drug' 
+# nor (indir,outdir,process_dir) are defined
+if not drug:
+    if not indir or not outdir or not process_dir:
+        print('ERROR: if "drug" is not specified, you must specify Input, Output, and Process directories')
+        sys.exit()
+
+#==============
+# directories
+#==============
+if not datadir:
+    datadir = homedir + '/' + 'git/Data'
+    
+if not indir:
+    indir = datadir + '/' + drug + '/input'
+    
+if not outdir:
+    outdir = datadir + '/' + drug + '/output'
+
+#TODO: perhaps better handled by refactoring code to prevent generating lots of output files!
+if not process_dir:
+    process_dir = datadir + '/' + drug + '/processing'
+
+# Make all paths absolute in case the user forgot
+indir = os.path.abspath(indir)
+process_dir = os.path.abspath(process_dir)
+outdir = os.path.abspath(outdir)
+datadir = os.path.abspath(datadir)
+
+#=======
+# input
+#=======
+# FIXME
+if pdb_filename:
+    pdb_filename = os.path.abspath(pdb_filename)
+    pdb_name = Path(pdb_filename).stem
+    infile_pdb = pdb_filename 
+else:
+    pdb_filename = gene.lower() + '_complex.pdb'
+    pdb_name = Path(pdb_filename).stem
+    infile_pdb = indir + '/' + pdb_filename
+    
+actual_pdb_filename = Path(infile_pdb).name
+
+if mut_filename:
+    mutation_file = os.path.abspath(mut_filename)
+    infile_muts = mutation_file
+    print('User-provided mutation file in use:', infile_muts)
+else:
+    mutation_file =  gene.lower() + '_mcsm_formatted_snps.csv'
+    infile_muts = outdir + '/' + mutation_file
+    print('WARNING: Assuming default mutation file:', infile_muts)
+
+#=======
+# output 
+#=======
+out_filename = gene.lower() + '_foldx.csv'
+outfile_foldx =  outdir + '/' + out_filename
+
+print('Arguments being passed:'
+, '\nDrug:', args.drug
+, '\ngene:', args.gene
+, '\ninput dir:', indir
+, '\nprocess dir:', process_dir
+, '\noutput dir:', outdir
+, '\npdb file:', infile_pdb
+, '\npdb name:', pdb_name
+, '\nactual pdb name:', actual_pdb_filename
+, '\nmutation file:', infile_muts
+, '\nchain1:', args.chain1
+, '\noutput file:', outfile_foldx
+, '\n=============================================================')
+
+#### Delay for 10 seconds to check the params ####
+print('Sleeping for 10 seconds to give you time to cancel')
+time.sleep(10)
+#=======================================================================
+
+def getInteractionEnergy(filename):
+    data = pd.read_csv(filename,sep = '\t')
+    return data['Interaction Energy'].loc[0]
+
+def getInteractions(filename):
+    data = pd.read_csv(filename, index_col = 0, header = 0, sep = '\t')
+    contactList = getIndexes(data,1)
+    number = len(contactList)
+    return number
+
+def formatMuts(mut_file,pdbname):
+    with open(mut_file) as csvfile:
+        readCSV = csv.reader(csvfile)
+        muts = []
+        for row in readCSV:
+                mut = row[0]
+                muts.append(mut)
+        
+    mut_list = []
+    outfile = process_dir + '/individual_list_' + pdbname + '.txt'
+    with open(outfile, 'w') as output:
+        for m in muts:
+                print(m)
+                mut = m[:1] + chainA+ m[1:] 
+                mut_list.append(mut)
+                mut = mut + ';'
+                print(mut)
+                output.write(mut)
+                output.write('\n')
+    return mut_list
+
+def getIndexes(data, value):
+    colnames = data.columns.values
+    listOfPos = list()
+    result = data.isin([value])
+    result.columns = colnames
+    seriesdata = result.any()
+    columnNames = list(seriesdata[seriesdata==True].index)
+    for col in columnNames:
+        rows = list(result[col][result[col]==True].index)
+        
+        for row in rows:
+            listOfPos.append((row,col))
+    
+    return listOfPos
+
+def loadFiles(df):
+    # load a text file in to np matrix
+    resultList = []
+    f = open(df,'r')
+    for line in f:
+        line = line.rstrip('\n')
+        aVals = line.split('\t')
+        fVals = list(map(np.float32, sVals))
+        resultList.append(fVals)
+    f.close()
+    return np.asarray(resultList, dtype=np.float32)
+
+# TODO: put the subprocess call in a 'def'
+#def repairPDB():
+#    subprocess.call(['foldx' 
+#    , '--command=RepairPDB'
+#    , '--pdb-dir=' + indir
+#    ,  '--pdb=' + actual_pdb_filename 
+#    , '--ionStrength=0.05'#
+#   , '--pH=7'
+#    , '--water=PREDICT'
+#    , '--vdwDesign=1'
+#    , 'outPDB=true'
+#    , '--output-dir=' + process_dir])
+
+#=======================================================================    
+def main():
+    pdbname = pdb_name
+    comp = '' # for complex only
+    mut_filename = infile_muts #pnca_mcsm_snps.csv
+    mutlist = formatMuts(mut_filename, pdbname)
+
+    print(mutlist)
+    nmuts = len(mutlist)
+    print(nmuts)
+    print(mutlist)
+    
+    print('start')  
+    # some common parameters for foldX
+    foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 '
+    
+    print('\033[95mSTAGE: repair PDB (foldx subprocess) \033[0m')
+    print('Running foldx RepairPDB for WT')
+    subprocess.call(['foldx5' 
+    , '--command=RepairPDB'
+    , foldx_common
+    , '--pdb-dir=' + os.path.dirname(pdb_filename)
+    ,  '--pdb=' + actual_pdb_filename 
+    , 'outPDB=true'
+    , '--output-dir=' + process_dir])
+    print('\033[95mCOMPLETED STAGE: repair PDB\033[0m')
+    print('\n==========================================================')
+    
+    
+    print('\033[95mSTAGE: Foldx commands BM, PN and SD (foldx subprocess) for WT\033[0m')
+    print('Running foldx BuildModel for WT')
+    subprocess.call(['foldx5' 
+    , '--command=BuildModel'
+    , foldx_common
+    , '--pdb-dir=' + process_dir
+    ,  '--pdb=' + pdbname + '_Repair.pdb'
+    , '--mutant-file="individual_list_' + pdbname +'.txt"'
+    , 'outPDB=true'
+    , '--numberOfRuns=1'
+    , '--output-dir=' + process_dir], cwd=process_dir)
+
+    print('Running foldx PrintNetworks for WT')
+    subprocess.call(['foldx5' 
+    , '--command=PrintNetworks'
+    , '--pdb-dir=' + process_dir
+    ,  '--pdb=' + pdbname + '_Repair.pdb'
+    , '--water=PREDICT'
+    , '--vdwDesign=1'
+    , '--output-dir=' + process_dir], cwd=process_dir)
+
+    print('Running foldx SequenceDetail for WT')
+    subprocess.call(['foldx5' 
+    , '--command=SequenceDetail'
+    , '--pdb-dir=' + process_dir
+    ,  '--pdb=' + pdbname + '_Repair.pdb'
+    , '--water=PREDICT'
+    , '--vdwDesign=1'
+    , '--output-dir=' + process_dir], cwd=process_dir)
+    print('\033[95mCOMPLETED STAGE: Foldx commands BM, PN and SD\033[0m')
+    print('\n==========================================================')
+    
+    
+    print('\033[95mSTAGE: Print Networks (foldx subprocess) for MT\033[0m')
+    for n in range(1,nmuts+1):
+        print('\033[95mNETWORK:\033[0m', n)
+        print('Running foldx PrintNetworks for mutation', n)
+        subprocess.call(['foldx5' 
+        , '--command=PrintNetworks'
+        , '--pdb-dir=' + process_dir
+        ,  '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
+        , '--water=PREDICT'
+        , '--vdwDesign=1'
+        , '--output-dir=' + process_dir], cwd=process_dir) 
+    print('\033[95mCOMPLETED STAGE: Print Networks (foldx subprocess) for MT\033[0m')
+    print('\n==========================================================')
+    
+    
+    print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m')
+    for n in range(1,nmuts+1):
+        print('\033[95mMUTATION:\033[0m', n)
+        print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir ))    
+       #FIXME: bad design and needs to be done in a pythonic way
+        with suppress(Exception):
+            subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir])
+    print('\033[95mCOMPLETED STAGE: Rename Mutation Files (shell)\033[0m')
+    print('\n==========================================================')
+    
+                
+    print('\033[95mSTAGE: Rename Files (shell) for WT\033[0m')
+    # FIXME: this is bad design and needs to be done in a pythonic way
+    out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir])
+    print('\033[95mCOMPLETED STAGE: Rename Files (shell) for WT\033[0m')
+    print('\n==========================================================')
+    
+    
+    if comp=='y':
+        print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for WT\033[0m')
+        chain1=chainA
+        chain2=chainB
+        subprocess.call(['foldx5' 
+        , '--command=AnalyseComplex'
+        , '--pdb-dir=' + process_dir
+        ,  '--pdb=' + pdbname + '_Repair.pdb'
+        , '--analyseComplexChains=' + chain1 + ',' + chain2
+        , '--water=PREDICT'
+        , '--vdwDesign=1'
+        , '--output-dir=' + process_dir], cwd=process_dir)
+
+        # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
+        ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout'
+        ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
+        shutil.copyfile(ac_source, ac_dest)
+        print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for WT:\033[0m', n)
+
+        for n in range(1,nmuts+1):
+            print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for mutation:\033[0m', n)
+            subprocess.call(['foldx5' 
+            , '--command=AnalyseComplex'
+            , '--pdb-dir=' + process_dir
+            ,  '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
+            , '--analyseComplexChains=' + chain1 + ',' + chain2
+            , '--water=PREDICT'
+            , '--vdwDesign=1'
+            , '--output-dir=' + process_dir], cwd=process_dir)
+
+            # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
+            ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout'
+            ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt'
+            shutil.copyfile(ac_mut_source, ac_mut_dest)
+        print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
+        print('\n==========================================================')
+        
+    interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
+    
+    dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt'
+    dGdata = pd.read_csv(dGdatafile, sep = '\t')
+    
+    ddG=[]
+    print('ddG')
+    print(len(dGdata))
+    for i in range(0,len(dGdata)):
+        ddG.append(dGdata['total energy'].loc[i])
+    
+
+    nint = len(interactions)
+    wt_int = []
+
+    for i in interactions:
+        filename = process_dir + '/Matrix_' + i + '_'+ pdbname + '_Repair_PN.txt'
+        wt_int.append(getInteractions(filename))
+    print('wt')
+    print(wt_int)
+    
+    ntotal = nint+1
+    print(ntotal)
+    print(nmuts)
+    data = np.empty((ntotal,nmuts))
+    data[0] = ddG
+    print(data)
+    for i in range(0,len(interactions)):
+        d=[]
+        p=0
+        for n in range(1, nmuts+1):
+            print(i)
+            filename = process_dir + '/Matrix_' + interactions[i] + '_' + pdbname + '_Repair_' + str(n) + '_PN.txt'
+            mut = getInteractions(filename)
+            diff = wt_int[i] - mut
+            print(diff)
+            print(wt_int[i])
+            print(mut)
+            d.append(diff)
+        print(d)
+        data[i+1] = d    
+        
+    interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']   
+
+    print(interactions)
+
+    IE = []
+    if comp=='y':
+        wtfilename = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
+        wtE = getInteractionEnergy(wtfilename)
+        print(wtE)
+        for n in range(1,nmuts+1):
+            print(n)
+            filename = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) + '_AC.txt'
+            mutE = getInteractionEnergy(filename)
+            print(mutE)
+            diff = wtE - mutE
+            print(diff)
+            IE.append(diff)
+        print(IE)
+        IEresults = pd.DataFrame(IE,columns = ['Interaction Energy'], index = mutlist)
+        IEfilename = 'foldx_complexresults_'+pdbname+'.csv'
+        IEresults.to_csv(IEfilename)
+        print(len(IE))
+        data = np.append(data,[IE], axis = 0)
+        print(data)
+        interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']  
+
+    mut_file = process_dir + '/individual_list_' + pdbname + '.txt'
+    with open(mut_file) as csvfile:
+        readCSV = csv.reader(csvfile)
+        mutlist = []
+        for row in readCSV:
+                mut = row[0]
+                mutlist.append(mut)
+    print(mutlist)
+    print(len(mutlist))
+    print(data)
+    results = pd.DataFrame(data, columns = mutlist, index = interactions)
+    results.append(ddG)
+    #print(results.head())
+    
+    # my style formatted results
+    results2 = results.T # transpose df
+    results2.index.name = 'mutationinformation' # assign name to index
+    results2 = results2.reset_index() # turn it into a columns
+  
+    results2['mutationinformation'] = results2['mutationinformation'].replace({r'([A-Z]{1})[A-Z]{1}([0-9]+[A-Z]{1});' : r'\1 \2'}, regex = True) # capture mcsm style muts (i.e not the chain id)
+    results2['mutationinformation'] = results2['mutationinformation'].str.replace(' ', '') # remove empty space
+    
+    results2.rename(columns = {'Distances': 'Contacts'}, inplace = True)
+        
+    # lower case columns
+    results2.columns = results2.columns.str.lower()
+    
+    print('Writing file in the format below:\n'
+        , results2.head()
+        , '\nNo. of rows:', len(results2)
+        , '\nNo. of cols:', len(results2.columns))
+    
+    outputfilename = outfile_foldx   
+    #outputfilename = 'foldx_results_' + pdbname + '.csv'
+    #results.to_csv(outputfilename)
+    results2.to_csv(outputfilename, index = False)
+    print ('end')
+    
+if __name__ == '__main__':
+    main()
--- a/foldx/test2/deprecated_shell/mutruncomplex.sh
+++ b/foldx/test2/deprecated_shell/mutruncomplex.sh
--- a/foldx/test2/deprecated_shell/repairPDB.sh
+++ b/foldx/test2/deprecated_shell/repairPDB.sh
@ -1,7 +1,7 @@
 INDIR=$1
 PDB=$2
 OUTDIR=$3
-
+cd ${OUTDIR}
 logger "Running repairPDB"

 #foldx --command=RepairPDB --pdb="${PDB}.pdb" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR}
--- a/foldx/test2/deprecated_shell/runPrintNetworks.sh
+++ b/foldx/test2/deprecated_shell/runPrintNetworks.sh
--- a/foldx/test2/deprecated_shell/runcomplex.sh
+++ b/foldx/test2/deprecated_shell/runcomplex.sh
@ -7,4 +7,3 @@ logger "Running runcomplex"
 foldx --command=AnalyseComplex --pdb="${PDB}_Repair.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
 cp ${OUTDIR}/Summary_${PDB}_Repair_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
 #sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_AC.txt 
-
--- a/foldx/test2/deprecated_shell/runfoldx.sh
+++ b/foldx/test2/deprecated_shell/runfoldx.sh
@ -2,7 +2,7 @@ PDB=$1
 OUTDIR=$2
 cd ${OUTDIR}
 pwd
-ls
+ls -l
 logger "Running runfoldx"
 foldx --command=BuildModel --pdb="${PDB}_Repair.pdb" --mutant-file="individual_list_${PDB}.txt" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 --out-pdb=true --numberOfRuns=1 --output-dir=${OUTDIR}
 foldx --command=PrintNetworks --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
--- a/foldx/test2/mutrenamefiles.sh
+++ b/foldx/test2/mutrenamefiles.sh
@ -1,14 +1,15 @@
 PDB=$1
 n=$2
-#cd /home/tanu/git/LSHTM_analysis/foldx/
-logger "Running mutrenamefiles_mac"
+OUTDIR=$3
+cd ${OUTDIR}
+#cd /home/git/LSHTM_analysis/foldx/test2
 cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt
 sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt
 sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt
 sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_${n}_PN.txt
 sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_${n}_PN.txt
 cp Matrix_Distances_${PDB}_Repair_${n}_PN.fxout Matrix_Distances_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,4d Matrix_Distances_${PDB}_Repair_${n}_PN.txt
+sed -i '1,4d' Matrix_Distances_${PDB}_Repair_${n}_PN.txt
 cp Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout Matrix_Volumetric_${PDB}_Repair_${n}_PN.txt
 sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_${n}_PN.txt
 sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_${n}_PN.txt
@ -35,34 +36,28 @@ sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClas
 sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_${n}_PN.txt
 sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_${n}_PN.txt
 cp AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
+sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
 cp AllAtoms_Electro_${PDB}_Repair_${n}_PN.fxout AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,2d AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
+sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
 cp AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
+sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
 cp AllAtoms_Partcov_${PDB}_Repair_${n}_PN.fxout AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,2d AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
+sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
 cp AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
+sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
 cp AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
+sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
 cp InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
+sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
 cp InteractingResidues_Distances_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,5d InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
+sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
 cp InteractingResidues_Electro_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,5d InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
+sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
 cp InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
+sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
 cp InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
+sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
 cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
+sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
 cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
-sed -i .bak -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
-
-
-
-
-
-
+sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
--- a/foldx/test2/renamefiles.sh
+++ b/foldx/test2/renamefiles.sh
@ -1,14 +1,16 @@
 PDB=$1
-logger "Running renamefiles_mac"
-#cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
-sed -i '.bak' -e 1,8d Dif_${PDB}_Repair.txt
+OUTDIR=$2
+cd ${OUTDIR}
+#cd /home/git/LSHTM_analysis/foldx/test2
+cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
+sed -i '1,8d' Dif_${PDB}_Repair.txt
 cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt
 sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_PN.txt
 sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_PN.txt
 sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_PN.txt
 sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_PN.txt
 cp Matrix_Distances_${PDB}_Repair_PN.fxout Matrix_Distances_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,4d Matrix_Distances_${PDB}_Repair_PN.txt
+sed -i '1,4d' Matrix_Distances_${PDB}_Repair_PN.txt
 cp Matrix_Volumetric_${PDB}_Repair_PN.fxout Matrix_Volumetric_${PDB}_Repair_PN.txt
 sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_PN.txt
 sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_PN.txt
@ -35,34 +37,28 @@ sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_M
 sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_PN.txt
 sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_PN.txt
 cp AllAtoms_Disulfide_${PDB}_Repair_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_PN.txt
+sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_PN.txt
 cp AllAtoms_Electro_${PDB}_Repair_PN.fxout AllAtoms_Electro_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,2d AllAtoms_Electro_${PDB}_Repair_PN.txt
+sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_PN.txt
 cp AllAtoms_Hbonds_${PDB}_Repair_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_PN.txt
+sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_PN.txt
 cp AllAtoms_Partcov_${PDB}_Repair_PN.fxout AllAtoms_Partcov_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,2d AllAtoms_Partcov_${PDB}_Repair_PN.txt
+sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_PN.txt
 cp AllAtoms_VdWClashes_${PDB}_Repair_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
+sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
 cp AllAtoms_Volumetric_${PDB}_Repair_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_PN.txt
+sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_PN.txt
 cp InteractingResidues_VdWClashes_${PDB}_Repair_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
+sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
 cp InteractingResidues_Distances_${PDB}_Repair_PN.fxout InteractingResidues_Distances_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,5d InteractingResidues_Distances_${PDB}_Repair_PN.txt
+sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_PN.txt
 cp InteractingResidues_Electro_${PDB}_Repair_PN.fxout InteractingResidues_Electro_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,5d InteractingResidues_Electro_${PDB}_Repair_PN.txt
+sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_PN.txt
 cp InteractingResidues_Hbonds_${PDB}_Repair_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
+sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
 cp InteractingResidues_Partcov_${PDB}_Repair_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_PN.txt
+sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_PN.txt
 cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
+sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
 cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
-sed -i '.bak' -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
-
-
-
-
-
-
+sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
--- a/foldx/test2/runFoldx.py
+++ b/foldx/test2/runFoldx.py
@ -0,0 +1 @@
+../runFoldx.py
--- a/foldx/test2/runFoldx_test.py
+++ b/foldx/test2/runFoldx_test.py
@ -0,0 +1,250 @@
+#!/usr/bin/env python3
+import subprocess
+import os
+import numpy as np
+import pandas as pd
+from contextlib import suppress
+import re
+import csv
+
+def getInteractions(filename):
+    data = pd.read_csv(filename, index_col=0, header =0, sep="\t")
+    contactList = getIndexes(data,1)
+    print(contactList)
+    number = len(contactList)
+    return number
+
+def formatMuts(mut_file,pdbname):  
+    with open(mut_file) as csvfile:
+        readCSV = csv.reader(csvfile)
+        muts = []
+        for row in readCSV:
+                mut = row[0]
+                muts.append(mut)
+        
+    mut_list = []
+    outfile = "/home/tanu/git/LSHTM_analysis/foldx/test2/individual_list_"+pdbname+".txt"
+    with open(outfile, "w") as output:
+        for m in muts:
+                print(m)
+                mut = m[:1]+'A'+m[1:]
+                mut_list.append(mut)
+                mut = mut + ";"
+                print(mut)
+                output.write(mut)
+                output.write("\n")
+    return mut_list
+
+def getIndexes(data, value):
+    colnames = data.columns.values
+    listOfPos = list()
+    result = data.isin([value])
+    result.columns=colnames
+    seriesdata = result.any()
+    columnNames = list(seriesdata[seriesdata==True].index)
+    for col in columnNames:
+        rows = list(result[col][result[col]==True].index)
+        
+        for row in rows:
+            listOfPos.append((row,col))
+    
+    return listOfPos
+
+def loadFiles(df):
+    # load a text file in to np matrix
+    resultList = []
+    f = open(df,'r')
+    for line in f:
+        line = line.rstrip('\n')
+        aVals = line.split("\t")
+        fVals = list(map(np.float32, sVals))
+        resultList.append(fVals)
+    f.close()
+    return np.asarray(resultList, dtype=np.float32)
+    
+#=======================================================================
+def main():
+    pdbname = '3pl1'
+    mut_filename = "pnca_muts_sample.csv"
+    mutlist = formatMuts(mut_filename, pdbname)
+
+    print(mutlist)
+    nmuts = len(mutlist)+1
+    print(nmuts)
+    print(mutlist)
+    print("start")
+
+    output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname])
+    print("end")
+    for n in range(1,nmuts):
+        print(n)
+        with suppress(Exception):
+            subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname,str(n)])
+        
+    for n in range(1,nmuts):
+        print(n)
+        with suppress(Exception):
+            subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname,str(n)])
+
+            
+    out = subprocess.check_output(['bash','renamefiles.sh',pdbname])
+    
+    dGdatafile = "/home/tanu/git/LSHTM_analysis/foldx/test2/Dif_"+pdbname+"_Repair.txt"
+    dGdata = pd.read_csv(dGdatafile, sep="\t")
+    print(dGdata)
+    ddG=[]
+    for i in range(0,len(dGdata)):
+        ddG.append(dGdata['total energy'].loc[i])
+    print(ddG)
+    distfile = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Distances_"+pdbname+"_Repair_PN.txt"
+    wt_nc = getInteractions(distfile)
+    
+    elecfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_RR_"+pdbname+"_Repair_PN.txt"
+    wt_neRR = getInteractions(elecfileRR)
+
+    elecfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_MM_"+pdbname+"_Repair_PN.txt"
+    wt_neMM = getInteractions(elecfileMM)
+    
+    elecfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_SM_"+pdbname+"_Repair_PN.txt"
+    wt_neSM = getInteractions(elecfileSM)
+
+    elecfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_SS_"+pdbname+"_Repair_PN.txt"
+    wt_neSS = getInteractions(elecfileSS)
+
+    disufileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_RR_"+pdbname+"_Repair_PN.txt"
+    wt_ndRR = getInteractions(disufileRR)
+
+    disufileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_MM_"+pdbname+"_Repair_PN.txt"
+    wt_ndMM = getInteractions(disufileMM)
+
+    disufileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_SM_"+pdbname+"_Repair_PN.txt"
+    wt_ndSM = getInteractions(disufileSM)
+
+    disufileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_SS_"+pdbname+"_Repair_PN.txt"
+    wt_ndSS = getInteractions(disufileSS)
+
+    hbndfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_RR_"+pdbname+"_Repair_PN.txt"
+    wt_nhRR = getInteractions(hbndfileRR)
+
+    hbndfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_MM_"+pdbname+"_Repair_PN.txt"
+    wt_nhMM = getInteractions(hbndfileMM)
+
+    hbndfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_SM_"+pdbname+"_Repair_PN.txt"
+    wt_nhSM = getInteractions(hbndfileSM)
+
+    hbndfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_SS_"+pdbname+"_Repair_PN.txt"
+    wt_nhSS = getInteractions(hbndfileSS)
+
+    partfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_RR_"+pdbname+"_Repair_PN.txt"
+    wt_npRR = getInteractions(partfileRR)
+
+    partfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_MM_"+pdbname+"_Repair_PN.txt"
+    wt_npMM = getInteractions(partfileMM)
+
+    partfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_SM_"+pdbname+"_Repair_PN.txt"
+    wt_npSM = getInteractions(partfileSM)
+
+    partfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_SS_"+pdbname+"_Repair_PN.txt"
+    wt_npSS = getInteractions(partfileSS)
+
+    vdwcfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_RR_"+pdbname+"_Repair_PN.txt"
+    wt_nvRR = getInteractions(vdwcfileRR)
+  
+    vdwcfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_MM_"+pdbname+"_Repair_PN.txt"
+    wt_nvMM = getInteractions(vdwcfileMM)
+
+    vdwcfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_SM_"+pdbname+"_Repair_PN.txt"
+    wt_nvSM = getInteractions(vdwcfileSM)
+
+    vdwcfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_SS_"+pdbname+"_Repair_PN.txt"
+    wt_nvSS = getInteractions(vdwcfileSS)
+
+    volufileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_RR_"+pdbname+"_Repair_PN.txt"
+    wt_nvoRR = getInteractions(volufileRR)
+
+    volufileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_MM_"+pdbname+"_Repair_PN.txt"
+    wt_nvoMM = getInteractions(volufileMM)
+
+    volufileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_SM_"+pdbname+"_Repair_PN.txt"
+    wt_nvoSM = getInteractions(volufileSM)
+
+    volufileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_SS_"+pdbname+"_Repair_PN.txt"
+    wt_nvoSS = getInteractions(volufileSS)
+
+    dnc = []
+    dneRR = []
+    dneMM = []
+    dneSM = []
+    dneSS = [] 
+    dndRR = []
+    dndMM = []
+    dndSM = []
+    dndSS = []
+    dnhRR = []
+    dnhMM = []
+    dnhSM = []
+    dnhSS = []
+    dnpRR = []
+    dnpMM = []
+    dnpSM = []
+    dnpSS = []
+    dnvRR = []
+    dnvMM = []
+    dnvSM = []
+    dnvSS = []
+    dnvoRR = []
+    dnvoMM = []
+    dnvoSM = []
+    dnvoSS = []
+    for n in range(1, nmuts):
+        filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Distances_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
+        mut_nc = getInteractions(filename)
+        diffc = wt_nc - mut_nc
+        dnc.append(diffc)
+
+        filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
+        mut_neRR = getInteractions(filename)
+        diffeRR = wt_neRR - mut_neRR
+        dneRR.append(diffeRR)
+
+        filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
+        mut_ndRR = getInteractions(filename)
+        diffdRR = wt_ndRR - mut_ndRR
+        dndRR.append(diffdRR)
+ 
+        filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
+        mut_nhRR = getInteractions(filename)
+        diffhRR = wt_nhRR - mut_nhRR
+        dnhRR.append(diffhRR)
+        
+        filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
+        mut_npRR = getInteractions(filename)
+        diffpRR = wt_npRR - mut_npRR
+        dnpRR.append(diffpRR)
+
+        filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
+        mut_nvRR = getInteractions(filename)
+        diffvRR = wt_nvRR - mut_nvRR
+        dnvRR.append(diffvRR)
+
+        filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
+        mut_nvoRR = getInteractions(filename)
+        diffvoRR = wt_nvoRR - mut_nvoRR
+        dnvoRR.append(diffvoRR)
+    print(dnc)
+    print(dneRR)
+    print(dndRR)
+    print(dnhRR)
+    print(dnpRR)
+    print(dnvRR)
+    print(dnvoRR)
+
+    results = pd.DataFrame([(ddG),(dnc),(dneRR),(dndRR),(dnhRR),(dnpRR),(dnvRR),(dnvoRR)], columns=mutlist, index=["ddG","contacts","electro","disulfide","hbonds","partcov","VdWClashes","volumetric"])
+    results.append(ddG)
+    print(results)
+    results2 = results.T # transpose df
+    outputfilename = "foldx_results_"+pdbname+".csv"
+#    results.to_csv(outputfilename)
+    results2.to_csv(outputfilename)
+if __name__ == "__main__":
+    main()
--- a/foldx/test2/runFoldx_test2.py
+++ b/foldx/test2/runFoldx_test2.py
@ -0,0 +1,456 @@
+#!/usr/bin/env python3
+import subprocess
+import os
+import sys
+import numpy as np
+import pandas as pd
+from contextlib import suppress
+from pathlib import Path
+import re
+import csv
+import argparse
+import shutil
+#https://realpython.com/python-pathlib/
+
+# FIXME
+#strong dependency of file and path names
+#cannot pass file with path. Need to pass them separately
+#assumptions made for dir struc as standard
+#datadir + drug + input
+
+#=======================================================================
+#%% specify input and curr dir
+homedir = os.path.expanduser('~')
+
+# set working dir
+os.getcwd()
+#os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
+#os.getcwd()
+
+#=======================================================================
+#%% command line args
+arg_parser = argparse.ArgumentParser()
+
+arg_parser.add_argument('-d', '--drug',     help = 'drug name', default = None)
+arg_parser.add_argument('-g', '--gene',     help = 'gene name (case sensitive)', default = None)
+
+arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
+arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
+arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
+arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME
+
+arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
+arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
+
+# FIXME: Doesn't work with 2 chains yet!
+arg_parser.add_argument('-c1', '--chain1',    help = 'Chain1 ID', default = 'A') # case sensitive
+arg_parser.add_argument('-c2', '--chain2',    help = 'Chain2 ID', default = 'B') # case sensitive
+
+args = arg_parser.parse_args()
+#=======================================================================
+#%% variable assignment: input and output 
+#drug = 'pyrazinamide'
+#gene = 'pncA'
+#gene_match = gene + '_p.'
+#%%=====================================================================
+# Command line options
+drug         = args.drug
+gene         = args.gene
+
+datadir      = args.datadir
+indir        = args.input_dir
+outdir       = args.output_dir
+process_dir  = args.process_dir
+
+mut_filename = args.mutation_file
+chainA       = args.chain1
+chainB       = args.chain2
+pdb_filename = args.pdb_file
+
+# os.path.splitext will fail interestingly with file.pdb.txt.zip
+#pdb_name = os.path.splitext(pdb_file)[0]
+# Just the filename, thanks
+#pdb_name = Path(in_filename_pdb).stem
+
+#==============
+# directories
+#==============
+if not datadir:
+    datadir = homedir + '/' + 'git/Data'
+    
+if not indir:
+    indir = datadir + '/' + drug + '/input'
+    
+if not outdir:
+    outdir = datadir + '/' + drug + '/output'
+
+#TODO: perhaps better handled by refactoring code to prevent generating lots of output files!
+#if not process_dir:
+#    process_dir = datadir + '/' + drug + '/processing'
+
+# Make all paths absolute in case the user forgot
+indir = os.path.abspath(indir)
+process_dir = os.path.abspath(process_dir)
+outdir = os.path.abspath(outdir)
+datadir = os.path.abspath(datadir)
+
+#=======
+# input
+#=======
+# FIXME
+if pdb_filename:
+    pdb_name = Path(pdb_filename).stem
+else:
+    pdb_filename = gene.lower() + '_complex.pdb'
+    pdb_name = Path(pdb_filename).stem
+
+infile_pdb = indir + '/' + pdb_filename
+actual_pdb_filename = Path(infile_pdb).name
+#actual_pdb_filename = os.path.abspath(infile_pdb)
+
+if mut_filename:
+    mutation_file = os.path.abspath(mut_filename)
+    infile_muts = mutation_file
+    print('User-provided mutation file in use:', infile_muts)
+else:
+    mutation_file =  gene.lower() + '_mcsm_formatted_snps.csv'
+    infile_muts = outdir + '/' + mutation_file
+    print('WARNING: Assuming default mutation file:', infile_muts)
+
+#=======
+# output 
+#=======
+out_filename = gene.lower() + '_foldx.csv'
+outfile_foldx =  outdir + '/' + out_filename
+
+print('Arguments being passed:'
+, '\nDrug:', args.drug
+, '\ngene:', args.gene
+, '\ninput dir:', indir
+, '\nprocess dir:', process_dir
+, '\noutput dir:', outdir
+, '\npdb file:', infile_pdb
+, '\npdb name:', pdb_name
+, '\nactual pdb name:', actual_pdb_filename
+, '\nmutation file:', infile_muts
+, '\nchain1:', args.chain1
+, '\noutput file:', outfile_foldx
+, '\n=============================================================')
+#=======================================================================
+
+def getInteractionEnergy(filename):
+    data = pd.read_csv(filename,sep = '\t')
+    return data['Interaction Energy'].loc[0]
+
+def getInteractions(filename):
+    data = pd.read_csv(filename, index_col = 0, header = 0, sep = '\t')
+    contactList = getIndexes(data,1)
+    number = len(contactList)
+    return number
+
+def formatMuts(mut_file,pdbname):
+    with open(mut_file) as csvfile:
+        readCSV = csv.reader(csvfile)
+        muts = []
+        for row in readCSV:
+                mut = row[0]
+                muts.append(mut)
+        
+    mut_list = []
+    outfile = process_dir + '/individual_list_' + pdbname + '.txt'
+    with open(outfile, 'w') as output:
+        for m in muts:
+                print(m)
+                mut = m[:1] + chainA+ m[1:] 
+                mut_list.append(mut)
+                mut = mut + ';'
+                print(mut)
+                output.write(mut)
+                output.write('\n')
+    return mut_list
+
+def getIndexes(data, value):
+    colnames = data.columns.values
+    listOfPos = list()
+    result = data.isin([value])
+    result.columns = colnames
+    seriesdata = result.any()
+    columnNames = list(seriesdata[seriesdata==True].index)
+    for col in columnNames:
+        rows = list(result[col][result[col]==True].index)
+        
+        for row in rows:
+            listOfPos.append((row,col))
+    
+    return listOfPos
+
+def loadFiles(df):
+    # load a text file in to np matrix
+    resultList = []
+    f = open(df,'r')
+    for line in f:
+        line = line.rstrip('\n')
+        aVals = line.split('\t')
+        fVals = list(map(np.float32, sVals))
+        resultList.append(fVals)
+    f.close()
+    return np.asarray(resultList, dtype=np.float32)
+
+# TODO: use this code pattern rather than invoking bash
+#def repairPDB():
+#    subprocess.call(['foldx' 
+#    , '--command=RepairPDB'
+#    , '--pdb-dir=' + indir
+#    ,  '--pdb=' + actual_pdb_filename 
+#    , '--ionStrength=0.05'#
+#   , '--pH=7'
+#    , '--water=PREDICT'
+#    , '--vdwDesign=1'
+#    , 'outPDB=true'
+#    , '--output-dir=' + process_dir])
+
+#=======================================================================    
+def main():
+    pdbname = pdb_name
+    comp = '' # for complex only
+    mut_filename = infile_muts #pnca_mcsm_snps.csv
+    mutlist = formatMuts(mut_filename, pdbname)
+
+    print(mutlist)
+    nmuts = len(mutlist)
+    print(nmuts)
+    print(mutlist)
+    print('start')
+    #subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir])
+    print('\033[95mSTAGE: repair PDB\033[0m')
+    print('EXECUTING: repairPDB.sh %s %s %s' % (indir, actual_pdb_filename, process_dir))
+    #subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir])
+    # once you decide to use the function
+    # repairPDB(pdbname)
+    
+    # FIXME: put this hack elsewhere
+    foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 '
+    
+    subprocess.call(['foldx' 
+    , '--command=RepairPDB'
+    , foldx_common
+    , '--pdb-dir=' + indir
+    ,  '--pdb=' + actual_pdb_filename 
+    , 'outPDB=true'
+    , '--output-dir=' + process_dir])
+    print('\033[95mCOMPLETE: repair PDB\033[0m')
+    print('\033[95mSTAGE: run FoldX (subprocess)\033[0m')
+    print('EXECUTING: runfoldx.sh %s %s ' % (pdbname, process_dir))
+    #output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir])
+    
+    print('Running foldx BuildModel')
+    subprocess.call(['foldx' 
+    , '--command=BuildModel'
+    , foldx_common
+    , '--pdb-dir=' + process_dir
+    ,  '--pdb=' + pdbname + '_Repair.pdb'
+    , '--mutant-file="individual_list_' + pdbname +'.txt"'
+    , 'outPDB=true'
+    , '--numberOfRuns=1'
+    , '--output-dir=' + process_dir], cwd=process_dir)
+
+    print('Running foldx PrintNetworks')
+    subprocess.call(['foldx' 
+    , '--command=PrintNetworks'
+    , '--pdb-dir=' + process_dir
+    ,  '--pdb=' + pdbname + '_Repair.pdb'
+    , '--water=PREDICT'
+    , '--vdwDesign=1'
+    , '--output-dir=' + process_dir], cwd=process_dir)
+
+    print('Running foldx SequenceDetail')
+    subprocess.call(['foldx' 
+    , '--command=SequenceDetail'
+    , '--pdb-dir=' + process_dir
+    ,  '--pdb=' + pdbname + '_Repair.pdb'
+    , '--water=PREDICT'
+    , '--vdwDesign=1'
+    , '--output-dir=' + process_dir], cwd=process_dir)
+
+    
+    print('\033[95mCOMPLETE: run FoldX (subprocess)\033[0m')
+    
+    print('\033[95mSTAGE: Print Networks (shell)\033[0m')
+    for n in range(1,nmuts+1):
+        print('\033[95mNETWORK:\033[0m', n)
+        #print('\033[96mCommand:\033[0m runPrintNetworks.sh %s %s %s' % (pdbname, str(n), process_dir ))
+        #with suppress(Exception):
+        #foldx --command=PrintNetworks --pdb="${PDB}_Repair_${n}.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
+        print('Running foldx PrintNetworks for mutation', n)
+        subprocess.call(['foldx' 
+        , '--command=PrintNetworks'
+        , '--pdb-dir=' + process_dir
+        ,  '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
+        , '--water=PREDICT'
+        , '--vdwDesign=1'
+        , '--output-dir=' + process_dir], cwd=process_dir)
+            #subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir])
+    print('\033[95mCOMPLETE: Print Networks (shell)\033[0m')
+
+    print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m')
+    for n in range(1,nmuts+1):
+        print('\033[95mMUTATION:\033[0m', n)
+        print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir ))
+        # FIXME: this is bad design and needs to be done in a pythonic way
+        with suppress(Exception):
+            subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir])
+    print('\033[95mCOMPLETE: Rename Mutation Files (shell)\033[0m')
+            
+    print('\033[95mSTAGE: Rename Files (shell)\033[0m')
+    # FIXME: this is bad design and needs to be done in a pythonic way
+    out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir])
+    print('\033[95mCOMPLETE: Rename Files (shell)\033[0m')
+
+    if comp=='y':
+        print('\033[95mSTAGE: Running foldx AnalyseComplex (subprocess)\033[0m')
+        chain1=chainA
+        chain2=chainB
+        #with suppress(Exception):
+            #subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir])
+        subprocess.call(['foldx' 
+        , '--command=AnalyseComplex'
+        , '--pdb-dir=' + process_dir
+        ,  '--pdb=' + pdbname + '_Repair.pdb'
+        , '--analyseComplexChains=' + chain1 + ',' + chain2
+        , '--water=PREDICT'
+        , '--vdwDesign=1'
+        , '--output-dir=' + process_dir], cwd=process_dir)
+
+        # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
+        ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout'
+        ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
+        shutil.copyfile(ac_source, ac_dest)
+
+        for n in range(1,nmuts+1):
+            print('\033[95mSTAGE: Running foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
+            #with suppress(Exception):
+            #    subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir])
+            subprocess.call(['foldx' 
+            , '--command=AnalyseComplex'
+            , '--pdb-dir=' + process_dir
+            ,  '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
+            , '--analyseComplexChains=' + chain1 + ',' + chain2
+            , '--water=PREDICT'
+            , '--vdwDesign=1'
+            , '--output-dir=' + process_dir], cwd=process_dir)
+
+            # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
+            ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout'
+            ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt'
+            shutil.copyfile(ac_mut_source, ac_mut_dest)
+        print('\033[95mCOMPLETE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
+
+    interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS',
+                    'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
+                    'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
+    
+    dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt'
+    dGdata = pd.read_csv(dGdatafile, sep = '\t')
+    
+    ddG=[]
+    print('ddG')
+    print(len(dGdata))
+    for i in range(0,len(dGdata)):
+        ddG.append(dGdata['total energy'].loc[i])
+    
+
+    nint = len(interactions)
+    wt_int = []
+
+    for i in interactions:
+        filename = process_dir + '/Matrix_' + i + '_'+ pdbname + '_Repair_PN.txt'
+        wt_int.append(getInteractions(filename))
+    print('wt')
+    print(wt_int)
+    
+    ntotal = nint+1
+    print(ntotal)
+    print(nmuts)
+    data = np.empty((ntotal,nmuts))
+    data[0] = ddG
+    print(data)
+    for i in range(0,len(interactions)):
+        d=[]
+        p=0
+        for n in range(1, nmuts+1):
+            print(i)
+            filename = process_dir + '/Matrix_' + interactions[i] + '_' + pdbname + '_Repair_' + str(n) + '_PN.txt'
+            mut = getInteractions(filename)
+            diff = wt_int[i] - mut
+            print(diff)
+            print(wt_int[i])
+            print(mut)
+            d.append(diff)
+        print(d)
+        data[i+1] = d    
+        
+    interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']   
+
+    print(interactions)
+
+    IE = []
+    if comp=='y':
+        wtfilename = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
+        wtE = getInteractionEnergy(wtfilename)
+        print(wtE)
+        for n in range(1,nmuts+1):
+            print(n)
+            filename = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) + '_AC.txt'
+            mutE = getInteractionEnergy(filename)
+            print(mutE)
+            diff = wtE - mutE
+            print(diff)
+            IE.append(diff)
+        print(IE)
+        IEresults = pd.DataFrame(IE,columns = ['Interaction Energy'], index = mutlist)
+        IEfilename = 'foldx_complexresults_'+pdbname+'.csv'
+        IEresults.to_csv(IEfilename)
+        print(len(IE))
+        data = np.append(data,[IE], axis = 0)
+        print(data)
+        interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']  
+
+    mut_file = process_dir + '/individual_list_' + pdbname + '.txt'
+    with open(mut_file) as csvfile:
+        readCSV = csv.reader(csvfile)
+        mutlist = []
+        for row in readCSV:
+                mut = row[0]
+                mutlist.append(mut)
+    print(mutlist)
+    print(len(mutlist))
+    print(data)
+    results = pd.DataFrame(data, columns = mutlist, index = interactions)
+    results.append(ddG)
+    #print(results.head())
+    
+    # my style formatted results
+    results2 = results.T # transpose df
+    results2.index.name = 'mutationinformation' # assign name to index
+    results2 = results2.reset_index() # turn it into a columns
+  
+    results2['mutationinformation'] = results2['mutationinformation'].replace({r'([A-Z]{1})[A-Z]{1}([0-9]+[A-Z]{1});' : r'\1 \2'}, regex = True) # capture mcsm style muts (i.e not the chain id)
+    results2['mutationinformation'] = results2['mutationinformation'].str.replace(' ', '') # remove empty space
+    
+    results2.rename(columns = {'Distances': 'Contacts'}, inplace = True)
+        
+    # lower case columns
+    results2.columns = results2.columns.str.lower()
+    
+    print('Writing file in the format below:\n'
+        , results2.head()
+        , '\nNo. of rows:', len(results2)
+        , '\nNo. of cols:', len(results2.columns))
+    
+    outputfilename = outfile_foldx   
+    #outputfilename = 'foldx_results_' + pdbname + '.csv'
+    #results.to_csv(outputfilename)
+    results2.to_csv(outputfilename, index = False)
+    
+if __name__ == '__main__':
+    main()
--- a/scripts/data_extraction.py
+++ b/scripts/data_extraction.py
@ -26,7 +26,7 @@ Created on Tue Aug  6 12:56:03 2019
 # 1) <gene>_gwas.csv
 # 2) <gene>_common_ids.csv
 # 3) <gene>_ambiguous_muts.csv
-# 4) <gene>_mcsm_snps.csv
+# 4) <gene>_mcsm_formatted_snps.csv
 # 5) <gene>_metadata_poscounts.csv
 # 6) <gene>_metadata.csv
 # 7) <gene>_all_muts_msa.csv
@ -1193,7 +1193,7 @@ if snps_only.mutationinformation.isna().sum() == 0:
 else:
    sys.exit('FAIL: SNP has NA, Possible mapping issues from dict?')

-out_filename_mcsmsnps = gene.lower() + '_mcsm_style_snps.csv'
+out_filename_mcsmsnps = gene.lower() + '_mcsm_formatted_snps.csv'
 outfile_mcsmsnps = outdir + '/' + out_filename_mcsmsnps

 print('\n----------------------------------'