separated defs and calls and added a separate script to test examples

2021-02-12 14:15:55 +00:00 · 2021-02-12 14:15:55 +00:00 · deb0aa8e58
commit deb0aa8e58
parent 6c458f8883
13 changed files with 281 additions and 517 deletions
--- a/dynamut/dynamut.py
+++ b/dynamut/dynamut.py
@ -1,46 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Aug 19 14:33:51 2020
-
-@author: tanu
-"""
-
-
-#%% load packages
-import os,sys
-import subprocess
-import argparse
-import requests
-import re
-import time
-from bs4 import BeautifulSoup
-import pandas as pd
-from pandas.api.types import is_string_dtype
-from pandas.api.types import is_numeric_dtype
-#%%============================================================================
-
-#1) define muts batch
-#take mcsm file
-#split into 'n' batches
-#write output file with suffix of batch number
-
-
-#********** done this par ****************
-#2) get results for a batch url
-# read  file
-# store batch url
-#extract  number
-#build single url
-#build single results urls
-#get results and store them in df
-#update df 
-#dim of df = no. of muts in batch
-
-#3) format results
-# store unit measurements separtely
-# omit unit measurements from cols
-# create extra columns '_outcome' suffix by splitting numerical output
-# create separate col for  mcsm as it doesn't have output text
-      
-#%%============================================================================
--- a/dynamut/dynamut_results/dynamut_output_multiple.csv
+++ b/dynamut/dynamut_results/dynamut_output_multiple.csv
@ -0,0 +1,7 @@
+mutationinformation,ddg_dynamut,ddg_encom,ddg_mcsm,ddg_sdm,ddg_duet,dds_encom
+G13V,0.006 kcal/mol (Stabilizing),-0.053 kcal/mol (Destabilizing),-0.261 kcal/mol (Destabilizing),-0.120 kcal/mol (Destabilizing),0.120 kcal/mol (Stabilizing),0.066 kcal.mol-1.K-1 (Increase of molecule flexibility)
+A19T,-0.077 kcal/mol (Destabilizing),0.224 kcal/mol (Destabilizing),-0.631 kcal/mol (Destabilizing),-2.620 kcal/mol (Destabilizing),-0.758 kcal/mol (Destabilizing),-0.280 kcal.mol-1.K-1 (Decrease of molecule flexibility)
+I4N,-0.239 kcal/mol (Destabilizing),-0.720 kcal/mol (Destabilizing),-0.728 kcal/mol (Destabilizing),-0.550 kcal/mol (Destabilizing),-0.461 kcal/mol (Destabilizing),0.900 kcal.mol-1.K-1 (Increase of molecule flexibility)
+P3S,0.727 kcal/mol (Stabilizing),0.334 kcal/mol (Destabilizing),-0.672 kcal/mol (Destabilizing),0.010 kcal/mol (Stabilizing),-0.252 kcal/mol (Destabilizing),-0.418 kcal.mol-1.K-1 (Decrease of molecule flexibility)
+F12S,-0.270 kcal/mol (Destabilizing),0.048 kcal/mol (Destabilizing),-1.028 kcal/mol (Destabilizing),-0.930 kcal/mol (Destabilizing),-0.993 kcal/mol (Destabilizing),-0.060 kcal.mol-1.K-1 (Decrease of molecule flexibility)
+A19V,2.389 kcal/mol (Stabilizing),0.450 kcal/mol (Destabilizing),0.659 kcal/mol (Stabilizing),-0.170 kcal/mol (Destabilizing),1.040 kcal/mol (Stabilizing),-0.562 kcal.mol-1.K-1 (Decrease of molecule flexibility)
--- a/dynamut/dynamut_results/dynamut_output_single.csv
+++ b/dynamut/dynamut_results/dynamut_output_single.csv
@ -0,0 +1,3 @@
+mutationinformation,ddg_dynamut,ddg_encom,ddg_mcsm,ddg_sdm,ddg_duet,dds_encom
+G13V,0.006 kcal/mol (Stabilizing),-0.053 kcal/mol (Destabilizing),-0.261 kcal/mol (Destabilizing),-0.120 kcal/mol (Destabilizing),0.120 kcal/mol (Stabilizing),0.066 kcal.mol-1.K-1 (Increase of molecule flexibility)
+A19T,-0.077 kcal/mol (Destabilizing),0.224 kcal/mol (Destabilizing),-0.631 kcal/mol (Destabilizing),-2.620 kcal/mol (Destabilizing),-0.758 kcal/mol (Destabilizing),-0.280 kcal.mol-1.K-1 (Decrease of molecule flexibility)
--- a/dynamut/example_input/snp_test1.csv
+++ b/dynamut/example_input/snp_test1.csv
@ -0,0 +1,2 @@
+F12S
+A19V
--- a/dynamut/example_input/snp_test2.csv
+++ b/dynamut/example_input/snp_test2.csv
@ -0,0 +1,2 @@
+G13V
+A19T
--- a/dynamut/examples.py
+++ b/dynamut/examples.py
@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Feb 12 12:15:26 2021
+
+@author: tanu
+"""
+import os
+homedir = os.path.expanduser('~')
+os.chdir (homedir + '/git/LSHTM_analysis/dynamut')
+from get_results import *
+from submit import *
+#%%#####################################################################
+#EXAMPLE RUN for different stages
+#=====================
+# STAGE: submit.py
+#=====================
+my_host = 'http://biosig.unimelb.edu.au'
+my_prediction_url = f"{host}/dynamut/prediction_list"
+print(prediction_url)
+
+my_outdir = homedir + '/git/LSHTM_analysis/dynamut'
+my_chain = 'A'
+my_email = 'tanushree.tunstall@lshtm.ac.uk'
+
+my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb'
+my_mutation_list = homedir + '/git/LSHTM_analysis/dynamut/example_input/snp_test1.csv'
+my_suffix = 'gid_test1'
+
+#----------------------------------------------
+# example 1: 2 snps in a file
+#----------------------------------------------
+submit_dynamut(host_url = my_host
+, pdb_file = my_pdb_file
+, mutation_list = my_mutation_list
+, chain = my_chain
+, email_address = my_email
+, prediction_url = my_prediction_url
+, output_dir = my_outdir
+, outfile_suffix = my_suffix) 
+#%%###################################################################
+#=====================
+# STAGE:get_results.py
+#=====================
+my_host = 'http://biosig.unimelb.edu.au'
+my_outdir = homedir + '/git/LSHTM_analysis/dynamut'
+
+#----------------------------------------------
+# example 1: multiple urls in a single file
+#----------------------------------------------
+my_url_file_multiple = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_multiple.txt'
+print(my_url_file_multiple)
+my_suffix = 'multiple'
+
+get_results(url_file  = my_url_file_multiple
+            , host_url = my_host
+            , output_dir = my_outdir
+            , outfile_suffix = my_suffix)
+
+#----------------------------------------------
+# example 2: single url in a file
+#----------------------------------------------
+my_url_file_single = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_single.txt'
+print(my_url_file_multiple)
+my_suffix = 'single'
+
+get_results(my_url_file_single
+            , host_url = my_host
+            , output_dir = my_outdir
+            , outfile_suffix = my_suffix)
+#%%###################################################################
--- a/dynamut/get_results.py
+++ b/dynamut/get_results.py
@ -16,140 +16,68 @@ from bs4 import BeautifulSoup
 import pandas as pd
 from pandas.api.types import is_string_dtype
 from pandas.api.types import is_numeric_dtype
-#%%============================================================================
-host = 'http://biosig.unimelb.edu.au'
-pred_dynamut_batch = '/dynamut/results_prediction/161287964015'
-batch_result_url = host + pred_dynamut_batch
-batch_result_url 
-
-# build a single url with a given mutation
-result_id = re.search( r"([0-9]+)$", pred_dynamut_batch).group(0)
-mut = 'S2C'
-single_url = host + '/single_results/' + str(result_id)
-single_result_url = host + '/single_results/' + str(result_id) + '/' + mut
-print(single_result_url)
-
-#%%============================================================================
-param_dict = {}
-
-result_response = requests.get(single_result_url)
-if result_response.status_code == 200:
-        print('Fetching results')
-        # extract results using the html parser          
-        soup = BeautifulSoup(result_response.text, features = 'html.parser')
-        #web_result_raw = soup.find(id = 'predictions').get_text()
-        ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
-        ddg_encom = soup.find(id = 'ddg_encom').get_text()
-        ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
-        ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
-        ddg_duet = soup.find(id = 'ddg_duet').get_text()
-        dds_encom = soup.find(id = 'dds_encom').get_text()
-        
-        param_dict = {"mutationinformation" : mut
-            , "ddg_dynamut" : ddg_dynamut
-            , "ddg_encom"   : ddg_encom
-            , "ddg_mcsm"    : ddg_mcsm
-            , "ddg_sdm"     : ddg_sdm
-            , "ddg_duet"    : ddg_duet
-            , "dds_encom"   : dds_encom
-            
-            }
-        results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T  
-
-print(results_df)
-
-#%% looping over mutation
-single_url = host + '/single_results/' + str(result_id)    
-muts = ["S2C", "S2F"]
-
-# initilialise empty df
-dynamut_results_df = pd.DataFrame()
-
-for i, mut in enumerate(muts):
-    #param_dict = {}
-    print('Running mutation', i+1, ':', mut)
-    snp = mut
-    single_result_url = single_url + '/' + snp
-    print('Getting results from:',  single_result_url)
-    
-    result_response = requests.get(single_result_url)
-    if result_response.status_code == 200:
-            print('Fetching results')
-            # extract results using the html parser          
-            soup = BeautifulSoup(result_response.text, features = 'html.parser')
-            #web_result_raw = soup.find(id = 'predictions').get_text()
-            ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
-            ddg_encom = soup.find(id = 'ddg_encom').get_text()
-            ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
-            ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
-            ddg_duet = soup.find(id = 'ddg_duet').get_text()
-            dds_encom = soup.find(id = 'dds_encom').get_text()
-            
-            param_dict = {"mutationinformation" : snp
-                , "ddg_dynamut" : ddg_dynamut
-                , "ddg_encom"   : ddg_encom
-                , "ddg_mcsm"    : ddg_mcsm
-                , "ddg_sdm"     : ddg_sdm
-                , "ddg_duet"    : ddg_duet
-                , "dds_encom"   : dds_encom 
-                }
-            results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
-            print(results_df)
-            dynamut_results_df = dynamut_results_df.append(results_df)
-            print(dynamut_results_df)
-            
-#%% Derive the single url from the batch result itself
-# get request from a batch url
-# corresponding to href
-batch_result_url 
-batch_response = requests.get(batch_result_url)
-batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
-print(batch_soup)
-#table = batch_soup.find('table', attrs = {'class':'table table-striped table-bordered table-responsive'})
-#btn = batch_soup.find_all(href = True, attrs = {'class':'btn btn-default btn-sm'})
-#print(btn)
-
-
-# initilialise empty df
-dynamut_results_df = pd.DataFrame()
-for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
-    print ("Found the URL:", a['href']) 
-    single_result_url = host + a['href']
-    snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0)
-    print(snp)
-    print('\nGetting results from:',  single_result_url)
-    
-    result_response = requests.get(single_result_url)
-    if result_response.status_code == 200:
-            print('\nFetching results for SNP:', snp)
-            # extract results using the html parser          
-            soup = BeautifulSoup(result_response.text, features = 'html.parser')
-            #web_result_raw = soup.find(id = 'predictions').get_text()
-            ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
-            ddg_encom = soup.find(id = 'ddg_encom').get_text()
-            ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
-            ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
-            ddg_duet = soup.find(id = 'ddg_duet').get_text()
-            dds_encom = soup.find(id = 'dds_encom').get_text()
-            
-            param_dict = {"mutationinformation" : snp
-                , "ddg_dynamut" : ddg_dynamut
-                , "ddg_encom"   : ddg_encom
-                , "ddg_mcsm"    : ddg_mcsm
-                , "ddg_sdm"     : ddg_sdm
-                , "ddg_duet"    : ddg_duet
-                , "dds_encom"   : dds_encom 
-                }
-            results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
-            print(results_df)
-            dynamut_results_df = dynamut_results_df.append(results_df)
-            print(dynamut_results_df)
-            print('\nWriting dynamut results df')
-            dynamut_results_df.to_csv('test_dynamut.csv', index = False)
-            print('\nResults File:'
-                  , '\nNo. of rows:', dynamut_results_df.shape[0]
-                  , '\nNo. of cols:', dynamut_results_df.shape[1])
+#%%#####################################################################
+def get_results(url_file, host_url, output_dir, outfile_suffix):
+    # initilialise empty df
+    dynamut_results_out_df = pd.DataFrame()
+    with open(url_file, 'r') as f:
+        for count, line in enumerate(f):
+            line = line.strip()
+            print('URL no.', count+1, '\n', line)
+            #batch_response = requests.get(line, headers=headers)
+            batch_response = requests.get(line)
+            batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
                                   
+            # initilialise empty df
+            #dynamut_results_df = pd.DataFrame()
+            for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
+                print ("Found the URL:", a['href']) 
+                single_result_url = host_url + a['href']
+                snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0)
+                print(snp)
+                print('\nGetting results from:',  single_result_url)
                
+                result_response = requests.get(single_result_url)
+                if result_response.status_code == 200:
+                        print('\nFetching results for SNP:', snp)
+                        # extract results using the html parser          
+                        soup = BeautifulSoup(result_response.text, features = 'html.parser')
+                        #web_result_raw = soup.find(id = 'predictions').get_text()
+                        ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
+                        ddg_encom = soup.find(id = 'ddg_encom').get_text()
+                        ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
+                        ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
+                        ddg_duet = soup.find(id = 'ddg_duet').get_text()
+                        dds_encom = soup.find(id = 'dds_encom').get_text()
                        
+                        param_dict = {"mutationinformation" : snp
+                            , "ddg_dynamut" : ddg_dynamut
+                            , "ddg_encom"   : ddg_encom
+                            , "ddg_mcsm"    : ddg_mcsm
+                            , "ddg_sdm"     : ddg_sdm
+                            , "ddg_duet"    : ddg_duet
+                            , "dds_encom"   : dds_encom 
+                            }
+                        results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
+                        print('Result DF:', results_df, 'for URL:', line)
+                        #dynamut_results_df = dynamut_results_df.append(results_df)#!1 too many!:-)
+                        dynamut_results_out_df = dynamut_results_out_df.append(results_df)
+                        #print(dynamut_results_out_df)
+    #============================
+    # Writing results file: csv
+    #============================                   
+    dynamut_results_dir = output_dir + '/dynamut_results'
+    if not os.path.exists(dynamut_results_dir):
+        print('\nCreating dir: dynamut_results within:', output_dir )
+        os.makedirs(dynamut_results_dir)   
+    print('\nWriting dynamut results df')
+    print('\nResults File:'
+          , '\nNo. of rows:', dynamut_results_out_df.shape[0]
+          , '\nNo. of cols:', dynamut_results_out_df.shape[1])
+    print(dynamut_results_out_df)
+    #dynamut_results_out_df.to_csv('/tmp/test_dynamut.csv', index = False)
    
+    # build out filename
+    out_filename = dynamut_results_dir + '/dynamut_output_' + outfile_suffix + '.csv'
+    dynamut_results_out_df.to_csv(out_filename, index = False)
+#%%#####################################################################    
--- a/dynamut/get_results_def.py
+++ b/dynamut/get_results_def.py
@ -1,108 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Aug 19 14:33:51 2020
-
-@author: tanu
-"""
-#%% load packages
-import os,sys
-import subprocess
-import argparse
-import requests
-import re
-import time
-from bs4 import BeautifulSoup
-import pandas as pd
-from pandas.api.types import is_string_dtype
-from pandas.api.types import is_numeric_dtype
-#%%
-def get_results(url_file, host_url, output_dir, outfile_suffix):
-    # initilialise empty df
-    dynamut_results_out_df = pd.DataFrame()
-    with open(url_file, 'r') as f:
-        for count, line in enumerate(f):
-            line = line.strip()
-            print('URL no.', count+1, '\n', line)
-            #batch_response = requests.get(line, headers=headers)
-            batch_response = requests.get(line)
-            batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
-                                   
-            # initilialise empty df
-            #dynamut_results_df = pd.DataFrame()
-            for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
-                print ("Found the URL:", a['href']) 
-                single_result_url = host_url + a['href']
-                snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0)
-                print(snp)
-                print('\nGetting results from:',  single_result_url)
-                
-                result_response = requests.get(single_result_url)
-                if result_response.status_code == 200:
-                        print('\nFetching results for SNP:', snp)
-                        # extract results using the html parser          
-                        soup = BeautifulSoup(result_response.text, features = 'html.parser')
-                        #web_result_raw = soup.find(id = 'predictions').get_text()
-                        ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
-                        ddg_encom = soup.find(id = 'ddg_encom').get_text()
-                        ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
-                        ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
-                        ddg_duet = soup.find(id = 'ddg_duet').get_text()
-                        dds_encom = soup.find(id = 'dds_encom').get_text()
-                        
-                        param_dict = {"mutationinformation" : snp
-                            , "ddg_dynamut" : ddg_dynamut
-                            , "ddg_encom"   : ddg_encom
-                            , "ddg_mcsm"    : ddg_mcsm
-                            , "ddg_sdm"     : ddg_sdm
-                            , "ddg_duet"    : ddg_duet
-                            , "dds_encom"   : dds_encom 
-                            }
-                        results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
-                        print('Result DF:', results_df, 'for URL:', line)
-                        #dynamut_results_df = dynamut_results_df.append(results_df)#!1 too many!:-)
-                        dynamut_results_out_df = dynamut_results_out_df.append(results_df)
-                        #print(dynamut_results_out_df)
-    #============================
-    # Writing results file: csv
-    #============================                   
-    dynamut_results_dir = output_dir + '/dynamut_results'
-    if not os.path.exists(dynamut_results_dir):
-        print('\nCreating dir: dynamut_results within:', output_dir )
-        os.makedirs(dynamut_results_dir)   
-    print('\nWriting dynamut results df')
-    print('\nResults File:'
-          , '\nNo. of rows:', dynamut_results_out_df.shape[0]
-          , '\nNo. of cols:', dynamut_results_out_df.shape[1])
-    print(dynamut_results_out_df)
-    #dynamut_results_out_df.to_csv('/tmp/test_dynamut.csv', index = False)
-    
-    # build out filename
-    out_filename = dynamut_results_dir + '/dynamut_output_' + outfile_suffix + '.csv'
-    dynamut_results_out_df.to_csv(out_filename, index = False)
-#%% EXAMPLE RUN
-# globals
-#homedir = os.path.expanduser('~')
-#my_host = 'http://biosig.unimelb.edu.au'
-#my_outdir = homedir + '/git/LSHTM_analysis/dynamut'
-
-#=============================================
-# example 1: multiple urls in a single file
-#=============================================
-#my_url_file_multiple = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_multiple.txt'
-#print(my_url_file_multiple)
-#get_results(url_file  = my_url_file_multiple
-#            , host_url = my_host
-#            , output_dir = my_outdir
-#            , outfile_suffix='multiple')
-
-#=============================================
-# example 2: single url in a file
-#=============================================
-#my_url_file_single = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_single.txt'
-#print(my_url_file_multiple)
-#get_results(my_url_file_single
-#            , host_url = my_host
-#            , output_dir = my_outdir
-#            , outfile_suffix = 'single')
-#%%
--- a/dynamut/reading_muts.py
+++ b/dynamut/reading_muts.py
@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Aug 19 14:33:51 2020
-
-@author: tanu
-"""
-
-
-#%% load packages
-import os,sys
-import subprocess
-import argparse
-import requests
-import re
-import time
-from bs4 import BeautifulSoup
-import pandas as pd
-import numpy as np
-from pandas.api.types import is_string_dtype
-from pandas.api.types import is_numeric_dtype
-#%%============================================================================
-# read mutation file
-
-all_muts = pd.read_csv("/home/tanu/git/Data/streptomycin/output/snp_batches/snps_batch_00", header=None)
-#https://gist.github.com/jrivero/1085501
-n = 20
-chunks = [all_muts[i:i+n] for i in range(0,all_muts.shape[0],n)]
-#%%
-
-muts_list = all_muts[0].values.tolist()
-
-host = 'http://biosig.unimelb.edu.au/dynamut'
-mut_prediction = '/prediction'
-
-submit_url = host + mut_prediction
--- a/dynamut/run_results.py
+++ b/dynamut/run_results.py
@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Aug 19 14:33:51 2020
+
+@author: tanu
+"""
+#%% load packages
+import os
+homedir = os.path.expanduser('~')
+os.chdir (homedir + '/git/LSHTM_analysis/dynamut')
+from get_results import *
+########################################################################
+# variables
+my_host = 'http://biosig.unimelb.edu.au'
+# Needed if things try to block the 'requests' user agent
+#headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"}
+
+# TODO: add cmd line args
+#gene = 'gid'
+drug = 'streptomycin'
+datadir = homedir + '/git/Data'
+indir = datadir + '/' + drug + '/input'
+outdir = datadir + '/' + drug + '/output'
+
+my_url_file =  outdir + '/dynamut_temp/dynamut_result_url_gid_b1.txt'
+my_suffix = 'gid_b1'
+#==========================
+# CALL: get_results() 
+# Data: gid+streptomycin
+#==========================
+print(my_url_file, 'suffix:', my_suffix)
+
+get_results(url_file  = my_url_file
+           , host_url = my_host
+           , output_dir = outdir
+           , outfile_suffix = my_suffix)
+########################################################################
--- a/dynamut/run_submit.py
+++ b/dynamut/run_submit.py
@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Aug 19 14:33:51 2020
+
+@author: tanu
+"""
+#%% load packages
+import os
+homedir = os.path.expanduser('~')
+os.chdir (homedir + '/git/LSHTM_analysis/dynamut')
+from submit import *
+########################################################################
+# variables
+my_host = 'http://biosig.unimelb.edu.au'
+my_prediction_url = f"{my_host}/dynamut/prediction_list"
+print(my_prediction_url)
+
+# TODO: add cmd line args
+#gene = 'gid'
+drug = 'streptomycin'
+datadir = homedir + '/git/Data'
+indir = datadir + '/' + drug + '/input'
+outdir = datadir + '/' + drug + '/output'
+
+my_chain = 'A'
+my_email = 'tanushree.tunstall@lshtm.ac.uk'
+
+my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb'
+
+# batch 1: 00.txt
+#my_mutation_list =outdir + '/snp_batches/50/snp_batch_00.txt'
+#my_suffix = 'gid_b1'
+#RAN: 11 Feb, ~14:00 pm * RETRIEVED
+
+# batch 2: 01.txt
+#my_mutation_list = outdir + '/snp_batches/50/snp_batch_01.txt'
+#my_suffix = 'gid_b2'
+#RAN: 12 Feb, ~10:00 am, AWAITING
+
+# batch 3: 02.txt
+my_mutation_list = outdir + '/snp_batches/50/snp_batch_02.txt'
+my_suffix = 'gid_b3'
+#RAN: 12 Feb, ~12:40 pm, AWAITING
+
+#==========================
+# CALL: submit_dynamut() 
+# Data: gid+streptomycin
+#==========================
+submit_dynamut(host_url = my_host
+               , pdb_file = my_pdb_file
+               , mutation_list = my_mutation_list
+               , chain = my_chain
+               , email_address = my_email
+               , prediction_url = my_prediction_url
+               , output_dir = outdir
+               , outfile_suffix = my_suffix) 
+#%%#####################################################################               
--- a/dynamut/submit.py
+++ b/dynamut/submit.py
@ -16,68 +16,21 @@ from bs4 import BeautifulSoup
 import pandas as pd
 from pandas.api.types import is_string_dtype
 from pandas.api.types import is_numeric_dtype
-#%% homedir
-homedir = os.path.expanduser('~')
-print('My homedir is:', homedir)
-
-#%%
-host = 'http://biosig.unimelb.edu.au'
-prediction_url = f"{host}/dynamut/prediction_list"
-print(prediction_url)
-
-#%% example params
-gene_name = 'gid'
-drug = 'streptomycin'
-datadir = homedir + '/git/Data'
-indir = datadir + '/' + drug + '/input'
-#outdir = datadir + '/' + drug + '/output'
-outdir = homedir + '/git/LSHTM_analysis/dynamut' # for example
-
-dynamut_temp_dir = outdir + '/dynamut_temp'
-
-if not os.path.exists(dynamut_temp_dir):
-    print('Creating dynamut_temp in outdir', outdir )
-    os.makedirs(dynamut_temp_dir)
-    
-batch_no = 1
-out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt'
-
-   
-#%% request calculation (no def)
-with open("/home/tanu/git/Data/streptomycin/input/gid_complex.pdb", "rb") as pdb_file, open ("/home/tanu/git/LSHTM_analysis/dynamut/snp_test2.csv", "rb") as mutation_list:
-        files = {"wild": pdb_file
-                 , "mutation_list": mutation_list}
-        body = {"chain": 'A'
-                , "email": 'tanushree.tunstall@lshtm.ac.uk'}
-
-        response = requests.post(prediction_url, files = files, data = body)
-        print(response.status_code)
-        if response.history:
-            print('PASS: valid mutation submitted. Fetching result url')
-            url_match = re.search('/dynamut/results_prediction/.+(?=")', response.text)
-            url = host + url_match.group()
-            print(url)
-            
-            #===============
-            # writing file: result urls
-            #===============
-            out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt'
-            print('Writing output url file:', out_url_file)
-            myfile = open(out_url_file, 'a')    
-            myfile.write(url)
-            myfile.close()
-
-#%%
-def request_calculation(pdb_file, mutation_list
+#%%#####################################################################
+def submit_dynamut(host_url
+                        , pdb_file
+                        , mutation_list
                        , chain
-                        , my_email
+                        , email_address
                        , prediction_url
                        , output_dir
-                        , gene_name
-                        , batch_no
-                        , out_url_file):
+                        , outfile_suffix
+                        ):
    """
-    Makes a POST request for a ligand affinity prediction.
+    Makes a POST request for dynamut predictions.
+
+    @param host_url: valid host url for submitting the job
+    @type string

    @param pdb_file: valid path to pdb structure
    @type string
@ -91,31 +44,44 @@ def request_calculation(pdb_file, mutation_list
 	@param prediction_url: dynamut url for prediction
 	@type string
       
-    @return txt file containing batch no. of snps processed
+    @param output_dir: output dir
+	@type string
+    
+    @param outfile_suffix: outfile_suffix
+	@type string, default is batch no.
+    
+    @param outfile_suffix: to append to outfile
+	@type string
+    
+    @return writes a .txt file containing url for the snps processed with user provided suffix in filename
    @type string
    """
    
-    with open(pdb_file, "rb") as pdb_file, open (mutation_list) as mutation_list:
+    with open(pdb_file, "rb") as pdb_file, open (mutation_list, "rb") as mutation_list:
        files = {"wild": pdb_file
                 , "mutation_list": mutation_list}
-        body = {"chain": 'A'
-                , "email": 'tanushree.tunstall@lshtm.ac.uk'}
+        body = {"chain": chain
+                , "email": email_address}

        response = requests.post(prediction_url, files = files, data = body)
        print(response.status_code)
        if response.history:
-            print('PASS: valid mutation submitted. Fetching result url')
+            print('\nPASS: valid submission. Fetching result url')
            url_match = re.search('/dynamut/results_prediction/.+(?=")', response.text)
-            url = host + url_match.group()
-            print(url)
+            url = host_url + url_match.group()
+            print('\nURL for snp batch no ', str(outfile_suffix), ':', url)
            
            #===============
            # writing file: result urls
            #===============
-            out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt'
-            print('Writing output url file:', out_url_file)
+            dynamut_temp_dir = output_dir + '/dynamut_temp' # creates a temp dir within output_dir
+            if not os.path.exists(dynamut_temp_dir):
+                print('\nCreating dynamut_temp in output_dir', output_dir )
+                os.makedirs(dynamut_temp_dir)                    
+            
+            out_url_file = dynamut_temp_dir + '/dynamut_result_url_' + str(outfile_suffix) + '.txt'
+            print('\nWriting output url file:', out_url_file)
            myfile = open(out_url_file, 'a')    
            myfile.write(url)
            myfile.close()
-#====================
-# Submit first batch
+#%%#####################################################################
--- a/dynamut/submit_def.py
+++ b/dynamut/submit_def.py
@ -1,121 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Aug 19 14:33:51 2020
-
-@author: tanu
-"""
-#%% load packages
-import os,sys
-import subprocess
-import argparse
-import requests
-import re
-import time
-from bs4 import BeautifulSoup
-import pandas as pd
-from pandas.api.types import is_string_dtype
-from pandas.api.types import is_numeric_dtype
-#%% homedir
-homedir = os.path.expanduser('~')
-print('My homedir is:', homedir)
-#%%
-def request_calculation(pdb_file
-                        , mutation_list
-                        , batch_no
-                        , chain
-                        , my_email
-                        , prediction_url
-                        , output_dir
-                        #, gene_name
-                        #, out_url_file
-                        ):
-    """
-    Makes a POST request for dynamut predictions.
-
-    @param pdb_file: valid path to pdb structure
-    @type string
-    
-    @param mutation_list: list of mutations (1 per line) of the format: {WT}<POS>{Mut}
-	@type string
-	    
-    @param batch_no: batch no so it can be added as a suffix to the the outfile
-	@type int
-    
-    @param chain: single-letter(caps)
-	@type chr
-
-	@param prediction_url: dynamut url for prediction
-	@type string
-       
-    @param output_dir: output dir
-	@type string
-
-    @param gene_name: name of gene
-	@type string
-    
-    #@param out_url_file: name of output file with batch no. as suffix
-	@type string
-       
-    @return txt file containing batch no. of snps processed (i.e out_url_file)
-    @type string
-    """
-    
-    with open(pdb_file, "rb") as pdb_file, open (mutation_list, "rb") as mutation_list:
-        files = {"wild": pdb_file
-                 , "mutation_list": mutation_list}
-        body = {"chain": chain
-                , "email": my_email}
-
-        response = requests.post(prediction_url, files = files, data = body)
-        print(response.status_code)
-        if response.history:
-            print('\nPASS: valid submission. Fetching result url')
-            url_match = re.search('/dynamut/results_prediction/.+(?=")', response.text)
-            url = host + url_match.group()
-            print('\nURL for snp batch no ', str(batch_no), ':', url)
-            
-            #===============
-            # writing file: result urls
-            #===============
-            dynamut_temp_dir = outdir + '/dynamut_temp'
-            if not os.path.exists(dynamut_temp_dir):
-                print('\nCreating dynamut_temp in outdir', outdir )
-                os.makedirs(dynamut_temp_dir)                    
-            
-            out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt'
-            print('\nWriting output url file:', out_url_file)
-            myfile = open(out_url_file, 'a')    
-            myfile.write(url)
-            myfile.close()
-#%%globals!?
-host = 'http://biosig.unimelb.edu.au'
-#prediction_url = f"{host}/dynamut/prediction_list"
-#print(prediction_url)
-
-#gene = 'gid'
-drug = 'streptomycin'
-datadir = homedir + '/git/Data'
-indir = datadir + '/' + drug + '/input'
-outdir = datadir + '/' + drug + '/output'
-#outdir = homedir + '/git/LSHTM_analysis/dynamut' # for example
-
-my_chain = 'A'
-my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb'
-
-# batch 1: 00.txt
-#my_mutation_list = homedir + '/git/Data/streptomycin/output/snp_batches/50/snp_batch_00.txt'
-#my_batch = 1
-
-# batch 2: 01.txt
-my_mutation_list = outdir + '/snp_batches/50/snp_batch_01.txt'
-my_batch = 2
-
-# %% call this function
-request_calculation (pdb_file = my_pdb_file
-, mutation_list = my_mutation_list
-, chain = my_chain
-, my_email = 'tanushree.tunstall@lshtm.ac.uk'
-, prediction_url = f"{host}/dynamut/prediction_list"
-, output_dir = outdir
-, batch_no = my_batch)