From deb0aa8e584f4f1469d7981121c060e6715e34f1 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 12 Feb 2021 14:15:55 +0000 Subject: [PATCH] separated defs and calls and added a separate script to test examples --- dynamut/dynamut.py | 46 ---- .../dynamut_output_multiple.csv | 7 + .../dynamut_results/dynamut_output_single.csv | 3 + dynamut/example_input/snp_test1.csv | 2 + dynamut/example_input/snp_test2.csv | 2 + dynamut/examples.py | 71 +++++++ dynamut/get_results.py | 200 ++++++------------ dynamut/get_results_def.py | 108 ---------- dynamut/reading_muts.py | 36 ---- dynamut/run_results.py | 38 ++++ dynamut/run_submit.py | 58 +++++ dynamut/submit.py | 106 ++++------ dynamut/submit_def.py | 121 ----------- 13 files changed, 281 insertions(+), 517 deletions(-) delete mode 100755 dynamut/dynamut.py create mode 100644 dynamut/dynamut_results/dynamut_output_multiple.csv create mode 100644 dynamut/dynamut_results/dynamut_output_single.csv create mode 100644 dynamut/example_input/snp_test1.csv create mode 100644 dynamut/example_input/snp_test2.csv create mode 100644 dynamut/examples.py mode change 100755 => 100644 dynamut/get_results.py delete mode 100644 dynamut/get_results_def.py delete mode 100755 dynamut/reading_muts.py create mode 100644 dynamut/run_results.py create mode 100644 dynamut/run_submit.py mode change 100755 => 100644 dynamut/submit.py delete mode 100644 dynamut/submit_def.py diff --git a/dynamut/dynamut.py b/dynamut/dynamut.py deleted file mode 100755 index fca749b..0000000 --- a/dynamut/dynamut.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Aug 19 14:33:51 2020 - -@author: tanu -""" - - -#%% load packages -import os,sys -import subprocess -import argparse -import requests -import re -import time -from bs4 import BeautifulSoup -import pandas as pd -from pandas.api.types import is_string_dtype -from pandas.api.types import is_numeric_dtype -#%%============================================================================ - -#1) define muts batch -#take mcsm file -#split into 'n' batches -#write output file with suffix of batch number - - -#********** done this par **************** -#2) get results for a batch url -# read file -# store batch url -#extract number -#build single url -#build single results urls -#get results and store them in df -#update df -#dim of df = no. of muts in batch - -#3) format results -# store unit measurements separtely -# omit unit measurements from cols -# create extra columns '_outcome' suffix by splitting numerical output -# create separate col for mcsm as it doesn't have output text - -#%%============================================================================ diff --git a/dynamut/dynamut_results/dynamut_output_multiple.csv b/dynamut/dynamut_results/dynamut_output_multiple.csv new file mode 100644 index 0000000..fef0be4 --- /dev/null +++ b/dynamut/dynamut_results/dynamut_output_multiple.csv @@ -0,0 +1,7 @@ +mutationinformation,ddg_dynamut,ddg_encom,ddg_mcsm,ddg_sdm,ddg_duet,dds_encom +G13V,0.006 kcal/mol (Stabilizing),-0.053 kcal/mol (Destabilizing),-0.261 kcal/mol (Destabilizing),-0.120 kcal/mol (Destabilizing),0.120 kcal/mol (Stabilizing),0.066 kcal.mol-1.K-1 (Increase of molecule flexibility) +A19T,-0.077 kcal/mol (Destabilizing),0.224 kcal/mol (Destabilizing),-0.631 kcal/mol (Destabilizing),-2.620 kcal/mol (Destabilizing),-0.758 kcal/mol (Destabilizing),-0.280 kcal.mol-1.K-1 (Decrease of molecule flexibility) +I4N,-0.239 kcal/mol (Destabilizing),-0.720 kcal/mol (Destabilizing),-0.728 kcal/mol (Destabilizing),-0.550 kcal/mol (Destabilizing),-0.461 kcal/mol (Destabilizing),0.900 kcal.mol-1.K-1 (Increase of molecule flexibility) +P3S,0.727 kcal/mol (Stabilizing),0.334 kcal/mol (Destabilizing),-0.672 kcal/mol (Destabilizing),0.010 kcal/mol (Stabilizing),-0.252 kcal/mol (Destabilizing),-0.418 kcal.mol-1.K-1 (Decrease of molecule flexibility) +F12S,-0.270 kcal/mol (Destabilizing),0.048 kcal/mol (Destabilizing),-1.028 kcal/mol (Destabilizing),-0.930 kcal/mol (Destabilizing),-0.993 kcal/mol (Destabilizing),-0.060 kcal.mol-1.K-1 (Decrease of molecule flexibility) +A19V,2.389 kcal/mol (Stabilizing),0.450 kcal/mol (Destabilizing),0.659 kcal/mol (Stabilizing),-0.170 kcal/mol (Destabilizing),1.040 kcal/mol (Stabilizing),-0.562 kcal.mol-1.K-1 (Decrease of molecule flexibility) diff --git a/dynamut/dynamut_results/dynamut_output_single.csv b/dynamut/dynamut_results/dynamut_output_single.csv new file mode 100644 index 0000000..b117494 --- /dev/null +++ b/dynamut/dynamut_results/dynamut_output_single.csv @@ -0,0 +1,3 @@ +mutationinformation,ddg_dynamut,ddg_encom,ddg_mcsm,ddg_sdm,ddg_duet,dds_encom +G13V,0.006 kcal/mol (Stabilizing),-0.053 kcal/mol (Destabilizing),-0.261 kcal/mol (Destabilizing),-0.120 kcal/mol (Destabilizing),0.120 kcal/mol (Stabilizing),0.066 kcal.mol-1.K-1 (Increase of molecule flexibility) +A19T,-0.077 kcal/mol (Destabilizing),0.224 kcal/mol (Destabilizing),-0.631 kcal/mol (Destabilizing),-2.620 kcal/mol (Destabilizing),-0.758 kcal/mol (Destabilizing),-0.280 kcal.mol-1.K-1 (Decrease of molecule flexibility) diff --git a/dynamut/example_input/snp_test1.csv b/dynamut/example_input/snp_test1.csv new file mode 100644 index 0000000..9e179c2 --- /dev/null +++ b/dynamut/example_input/snp_test1.csv @@ -0,0 +1,2 @@ +F12S +A19V diff --git a/dynamut/example_input/snp_test2.csv b/dynamut/example_input/snp_test2.csv new file mode 100644 index 0000000..e72171c --- /dev/null +++ b/dynamut/example_input/snp_test2.csv @@ -0,0 +1,2 @@ +G13V +A19T diff --git a/dynamut/examples.py b/dynamut/examples.py new file mode 100644 index 0000000..b8d6c23 --- /dev/null +++ b/dynamut/examples.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Feb 12 12:15:26 2021 + +@author: tanu +""" +import os +homedir = os.path.expanduser('~') +os.chdir (homedir + '/git/LSHTM_analysis/dynamut') +from get_results import * +from submit import * +#%%##################################################################### +#EXAMPLE RUN for different stages +#===================== +# STAGE: submit.py +#===================== +my_host = 'http://biosig.unimelb.edu.au' +my_prediction_url = f"{host}/dynamut/prediction_list" +print(prediction_url) + +my_outdir = homedir + '/git/LSHTM_analysis/dynamut' +my_chain = 'A' +my_email = 'tanushree.tunstall@lshtm.ac.uk' + +my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb' +my_mutation_list = homedir + '/git/LSHTM_analysis/dynamut/example_input/snp_test1.csv' +my_suffix = 'gid_test1' + +#---------------------------------------------- +# example 1: 2 snps in a file +#---------------------------------------------- +submit_dynamut(host_url = my_host +, pdb_file = my_pdb_file +, mutation_list = my_mutation_list +, chain = my_chain +, email_address = my_email +, prediction_url = my_prediction_url +, output_dir = my_outdir +, outfile_suffix = my_suffix) +#%%################################################################### +#===================== +# STAGE:get_results.py +#===================== +my_host = 'http://biosig.unimelb.edu.au' +my_outdir = homedir + '/git/LSHTM_analysis/dynamut' + +#---------------------------------------------- +# example 1: multiple urls in a single file +#---------------------------------------------- +my_url_file_multiple = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_multiple.txt' +print(my_url_file_multiple) +my_suffix = 'multiple' + +get_results(url_file = my_url_file_multiple + , host_url = my_host + , output_dir = my_outdir + , outfile_suffix = my_suffix) + +#---------------------------------------------- +# example 2: single url in a file +#---------------------------------------------- +my_url_file_single = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_single.txt' +print(my_url_file_multiple) +my_suffix = 'single' + +get_results(my_url_file_single + , host_url = my_host + , output_dir = my_outdir + , outfile_suffix = my_suffix) +#%%################################################################### diff --git a/dynamut/get_results.py b/dynamut/get_results.py old mode 100755 new mode 100644 index 5390e75..5ae343a --- a/dynamut/get_results.py +++ b/dynamut/get_results.py @@ -16,140 +16,68 @@ from bs4 import BeautifulSoup import pandas as pd from pandas.api.types import is_string_dtype from pandas.api.types import is_numeric_dtype -#%%============================================================================ -host = 'http://biosig.unimelb.edu.au' -pred_dynamut_batch = '/dynamut/results_prediction/161287964015' -batch_result_url = host + pred_dynamut_batch -batch_result_url - -# build a single url with a given mutation -result_id = re.search( r"([0-9]+)$", pred_dynamut_batch).group(0) -mut = 'S2C' -single_url = host + '/single_results/' + str(result_id) -single_result_url = host + '/single_results/' + str(result_id) + '/' + mut -print(single_result_url) - -#%%============================================================================ -param_dict = {} - -result_response = requests.get(single_result_url) -if result_response.status_code == 200: - print('Fetching results') - # extract results using the html parser - soup = BeautifulSoup(result_response.text, features = 'html.parser') - #web_result_raw = soup.find(id = 'predictions').get_text() - ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() - ddg_encom = soup.find(id = 'ddg_encom').get_text() - ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() - ddg_sdm = soup.find(id = 'ddg_sdm').get_text() - ddg_duet = soup.find(id = 'ddg_duet').get_text() - dds_encom = soup.find(id = 'dds_encom').get_text() - - param_dict = {"mutationinformation" : mut - , "ddg_dynamut" : ddg_dynamut - , "ddg_encom" : ddg_encom - , "ddg_mcsm" : ddg_mcsm - , "ddg_sdm" : ddg_sdm - , "ddg_duet" : ddg_duet - , "dds_encom" : dds_encom - - } - results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T - -print(results_df) - -#%% looping over mutation -single_url = host + '/single_results/' + str(result_id) -muts = ["S2C", "S2F"] - -# initilialise empty df -dynamut_results_df = pd.DataFrame() - -for i, mut in enumerate(muts): - #param_dict = {} - print('Running mutation', i+1, ':', mut) - snp = mut - single_result_url = single_url + '/' + snp - print('Getting results from:', single_result_url) +#%%##################################################################### +def get_results(url_file, host_url, output_dir, outfile_suffix): + # initilialise empty df + dynamut_results_out_df = pd.DataFrame() + with open(url_file, 'r') as f: + for count, line in enumerate(f): + line = line.strip() + print('URL no.', count+1, '\n', line) + #batch_response = requests.get(line, headers=headers) + batch_response = requests.get(line) + batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser') + + # initilialise empty df + #dynamut_results_df = pd.DataFrame() + for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}): + print ("Found the URL:", a['href']) + single_result_url = host_url + a['href'] + snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0) + print(snp) + print('\nGetting results from:', single_result_url) + + result_response = requests.get(single_result_url) + if result_response.status_code == 200: + print('\nFetching results for SNP:', snp) + # extract results using the html parser + soup = BeautifulSoup(result_response.text, features = 'html.parser') + #web_result_raw = soup.find(id = 'predictions').get_text() + ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() + ddg_encom = soup.find(id = 'ddg_encom').get_text() + ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() + ddg_sdm = soup.find(id = 'ddg_sdm').get_text() + ddg_duet = soup.find(id = 'ddg_duet').get_text() + dds_encom = soup.find(id = 'dds_encom').get_text() + + param_dict = {"mutationinformation" : snp + , "ddg_dynamut" : ddg_dynamut + , "ddg_encom" : ddg_encom + , "ddg_mcsm" : ddg_mcsm + , "ddg_sdm" : ddg_sdm + , "ddg_duet" : ddg_duet + , "dds_encom" : dds_encom + } + results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T + print('Result DF:', results_df, 'for URL:', line) + #dynamut_results_df = dynamut_results_df.append(results_df)#!1 too many!:-) + dynamut_results_out_df = dynamut_results_out_df.append(results_df) + #print(dynamut_results_out_df) + #============================ + # Writing results file: csv + #============================ + dynamut_results_dir = output_dir + '/dynamut_results' + if not os.path.exists(dynamut_results_dir): + print('\nCreating dir: dynamut_results within:', output_dir ) + os.makedirs(dynamut_results_dir) + print('\nWriting dynamut results df') + print('\nResults File:' + , '\nNo. of rows:', dynamut_results_out_df.shape[0] + , '\nNo. of cols:', dynamut_results_out_df.shape[1]) + print(dynamut_results_out_df) + #dynamut_results_out_df.to_csv('/tmp/test_dynamut.csv', index = False) - result_response = requests.get(single_result_url) - if result_response.status_code == 200: - print('Fetching results') - # extract results using the html parser - soup = BeautifulSoup(result_response.text, features = 'html.parser') - #web_result_raw = soup.find(id = 'predictions').get_text() - ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() - ddg_encom = soup.find(id = 'ddg_encom').get_text() - ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() - ddg_sdm = soup.find(id = 'ddg_sdm').get_text() - ddg_duet = soup.find(id = 'ddg_duet').get_text() - dds_encom = soup.find(id = 'dds_encom').get_text() - - param_dict = {"mutationinformation" : snp - , "ddg_dynamut" : ddg_dynamut - , "ddg_encom" : ddg_encom - , "ddg_mcsm" : ddg_mcsm - , "ddg_sdm" : ddg_sdm - , "ddg_duet" : ddg_duet - , "dds_encom" : dds_encom - } - results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T - print(results_df) - dynamut_results_df = dynamut_results_df.append(results_df) - print(dynamut_results_df) - -#%% Derive the single url from the batch result itself -# get request from a batch url -# corresponding to href -batch_result_url -batch_response = requests.get(batch_result_url) -batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser') -print(batch_soup) -#table = batch_soup.find('table', attrs = {'class':'table table-striped table-bordered table-responsive'}) -#btn = batch_soup.find_all(href = True, attrs = {'class':'btn btn-default btn-sm'}) -#print(btn) - - -# initilialise empty df -dynamut_results_df = pd.DataFrame() -for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}): - print ("Found the URL:", a['href']) - single_result_url = host + a['href'] - snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0) - print(snp) - print('\nGetting results from:', single_result_url) - - result_response = requests.get(single_result_url) - if result_response.status_code == 200: - print('\nFetching results for SNP:', snp) - # extract results using the html parser - soup = BeautifulSoup(result_response.text, features = 'html.parser') - #web_result_raw = soup.find(id = 'predictions').get_text() - ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() - ddg_encom = soup.find(id = 'ddg_encom').get_text() - ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() - ddg_sdm = soup.find(id = 'ddg_sdm').get_text() - ddg_duet = soup.find(id = 'ddg_duet').get_text() - dds_encom = soup.find(id = 'dds_encom').get_text() - - param_dict = {"mutationinformation" : snp - , "ddg_dynamut" : ddg_dynamut - , "ddg_encom" : ddg_encom - , "ddg_mcsm" : ddg_mcsm - , "ddg_sdm" : ddg_sdm - , "ddg_duet" : ddg_duet - , "dds_encom" : dds_encom - } - results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T - print(results_df) - dynamut_results_df = dynamut_results_df.append(results_df) - print(dynamut_results_df) - print('\nWriting dynamut results df') - dynamut_results_df.to_csv('test_dynamut.csv', index = False) - print('\nResults File:' - , '\nNo. of rows:', dynamut_results_df.shape[0] - , '\nNo. of cols:', dynamut_results_df.shape[1]) - - - - + # build out filename + out_filename = dynamut_results_dir + '/dynamut_output_' + outfile_suffix + '.csv' + dynamut_results_out_df.to_csv(out_filename, index = False) +#%%##################################################################### diff --git a/dynamut/get_results_def.py b/dynamut/get_results_def.py deleted file mode 100644 index 3ac86b0..0000000 --- a/dynamut/get_results_def.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Aug 19 14:33:51 2020 - -@author: tanu -""" -#%% load packages -import os,sys -import subprocess -import argparse -import requests -import re -import time -from bs4 import BeautifulSoup -import pandas as pd -from pandas.api.types import is_string_dtype -from pandas.api.types import is_numeric_dtype -#%% -def get_results(url_file, host_url, output_dir, outfile_suffix): - # initilialise empty df - dynamut_results_out_df = pd.DataFrame() - with open(url_file, 'r') as f: - for count, line in enumerate(f): - line = line.strip() - print('URL no.', count+1, '\n', line) - #batch_response = requests.get(line, headers=headers) - batch_response = requests.get(line) - batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser') - - # initilialise empty df - #dynamut_results_df = pd.DataFrame() - for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}): - print ("Found the URL:", a['href']) - single_result_url = host_url + a['href'] - snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0) - print(snp) - print('\nGetting results from:', single_result_url) - - result_response = requests.get(single_result_url) - if result_response.status_code == 200: - print('\nFetching results for SNP:', snp) - # extract results using the html parser - soup = BeautifulSoup(result_response.text, features = 'html.parser') - #web_result_raw = soup.find(id = 'predictions').get_text() - ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() - ddg_encom = soup.find(id = 'ddg_encom').get_text() - ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() - ddg_sdm = soup.find(id = 'ddg_sdm').get_text() - ddg_duet = soup.find(id = 'ddg_duet').get_text() - dds_encom = soup.find(id = 'dds_encom').get_text() - - param_dict = {"mutationinformation" : snp - , "ddg_dynamut" : ddg_dynamut - , "ddg_encom" : ddg_encom - , "ddg_mcsm" : ddg_mcsm - , "ddg_sdm" : ddg_sdm - , "ddg_duet" : ddg_duet - , "dds_encom" : dds_encom - } - results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T - print('Result DF:', results_df, 'for URL:', line) - #dynamut_results_df = dynamut_results_df.append(results_df)#!1 too many!:-) - dynamut_results_out_df = dynamut_results_out_df.append(results_df) - #print(dynamut_results_out_df) - #============================ - # Writing results file: csv - #============================ - dynamut_results_dir = output_dir + '/dynamut_results' - if not os.path.exists(dynamut_results_dir): - print('\nCreating dir: dynamut_results within:', output_dir ) - os.makedirs(dynamut_results_dir) - print('\nWriting dynamut results df') - print('\nResults File:' - , '\nNo. of rows:', dynamut_results_out_df.shape[0] - , '\nNo. of cols:', dynamut_results_out_df.shape[1]) - print(dynamut_results_out_df) - #dynamut_results_out_df.to_csv('/tmp/test_dynamut.csv', index = False) - - # build out filename - out_filename = dynamut_results_dir + '/dynamut_output_' + outfile_suffix + '.csv' - dynamut_results_out_df.to_csv(out_filename, index = False) -#%% EXAMPLE RUN -# globals -#homedir = os.path.expanduser('~') -#my_host = 'http://biosig.unimelb.edu.au' -#my_outdir = homedir + '/git/LSHTM_analysis/dynamut' - -#============================================= -# example 1: multiple urls in a single file -#============================================= -#my_url_file_multiple = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_multiple.txt' -#print(my_url_file_multiple) -#get_results(url_file = my_url_file_multiple -# , host_url = my_host -# , output_dir = my_outdir -# , outfile_suffix='multiple') - -#============================================= -# example 2: single url in a file -#============================================= -#my_url_file_single = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_single.txt' -#print(my_url_file_multiple) -#get_results(my_url_file_single -# , host_url = my_host -# , output_dir = my_outdir -# , outfile_suffix = 'single') -#%% diff --git a/dynamut/reading_muts.py b/dynamut/reading_muts.py deleted file mode 100755 index c03a0a1..0000000 --- a/dynamut/reading_muts.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Aug 19 14:33:51 2020 - -@author: tanu -""" - - -#%% load packages -import os,sys -import subprocess -import argparse -import requests -import re -import time -from bs4 import BeautifulSoup -import pandas as pd -import numpy as np -from pandas.api.types import is_string_dtype -from pandas.api.types import is_numeric_dtype -#%%============================================================================ -# read mutation file - -all_muts = pd.read_csv("/home/tanu/git/Data/streptomycin/output/snp_batches/snps_batch_00", header=None) -#https://gist.github.com/jrivero/1085501 -n = 20 -chunks = [all_muts[i:i+n] for i in range(0,all_muts.shape[0],n)] -#%% - -muts_list = all_muts[0].values.tolist() - -host = 'http://biosig.unimelb.edu.au/dynamut' -mut_prediction = '/prediction' - -submit_url = host + mut_prediction diff --git a/dynamut/run_results.py b/dynamut/run_results.py new file mode 100644 index 0000000..429da5d --- /dev/null +++ b/dynamut/run_results.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 14:33:51 2020 + +@author: tanu +""" +#%% load packages +import os +homedir = os.path.expanduser('~') +os.chdir (homedir + '/git/LSHTM_analysis/dynamut') +from get_results import * +######################################################################## +# variables +my_host = 'http://biosig.unimelb.edu.au' +# Needed if things try to block the 'requests' user agent +#headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} + +# TODO: add cmd line args +#gene = 'gid' +drug = 'streptomycin' +datadir = homedir + '/git/Data' +indir = datadir + '/' + drug + '/input' +outdir = datadir + '/' + drug + '/output' + +my_url_file = outdir + '/dynamut_temp/dynamut_result_url_gid_b1.txt' +my_suffix = 'gid_b1' +#========================== +# CALL: get_results() +# Data: gid+streptomycin +#========================== +print(my_url_file, 'suffix:', my_suffix) + +get_results(url_file = my_url_file + , host_url = my_host + , output_dir = outdir + , outfile_suffix = my_suffix) +######################################################################## diff --git a/dynamut/run_submit.py b/dynamut/run_submit.py new file mode 100644 index 0000000..45a7391 --- /dev/null +++ b/dynamut/run_submit.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 14:33:51 2020 + +@author: tanu +""" +#%% load packages +import os +homedir = os.path.expanduser('~') +os.chdir (homedir + '/git/LSHTM_analysis/dynamut') +from submit import * +######################################################################## +# variables +my_host = 'http://biosig.unimelb.edu.au' +my_prediction_url = f"{my_host}/dynamut/prediction_list" +print(my_prediction_url) + +# TODO: add cmd line args +#gene = 'gid' +drug = 'streptomycin' +datadir = homedir + '/git/Data' +indir = datadir + '/' + drug + '/input' +outdir = datadir + '/' + drug + '/output' + +my_chain = 'A' +my_email = 'tanushree.tunstall@lshtm.ac.uk' + +my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb' + +# batch 1: 00.txt +#my_mutation_list =outdir + '/snp_batches/50/snp_batch_00.txt' +#my_suffix = 'gid_b1' +#RAN: 11 Feb, ~14:00 pm * RETRIEVED + +# batch 2: 01.txt +#my_mutation_list = outdir + '/snp_batches/50/snp_batch_01.txt' +#my_suffix = 'gid_b2' +#RAN: 12 Feb, ~10:00 am, AWAITING + +# batch 3: 02.txt +my_mutation_list = outdir + '/snp_batches/50/snp_batch_02.txt' +my_suffix = 'gid_b3' +#RAN: 12 Feb, ~12:40 pm, AWAITING + +#========================== +# CALL: submit_dynamut() +# Data: gid+streptomycin +#========================== +submit_dynamut(host_url = my_host + , pdb_file = my_pdb_file + , mutation_list = my_mutation_list + , chain = my_chain + , email_address = my_email + , prediction_url = my_prediction_url + , output_dir = outdir + , outfile_suffix = my_suffix) +#%%##################################################################### diff --git a/dynamut/submit.py b/dynamut/submit.py old mode 100755 new mode 100644 index 8741237..1a10c4a --- a/dynamut/submit.py +++ b/dynamut/submit.py @@ -16,106 +16,72 @@ from bs4 import BeautifulSoup import pandas as pd from pandas.api.types import is_string_dtype from pandas.api.types import is_numeric_dtype -#%% homedir -homedir = os.path.expanduser('~') -print('My homedir is:', homedir) - -#%% -host = 'http://biosig.unimelb.edu.au' -prediction_url = f"{host}/dynamut/prediction_list" -print(prediction_url) - -#%% example params -gene_name = 'gid' -drug = 'streptomycin' -datadir = homedir + '/git/Data' -indir = datadir + '/' + drug + '/input' -#outdir = datadir + '/' + drug + '/output' -outdir = homedir + '/git/LSHTM_analysis/dynamut' # for example - -dynamut_temp_dir = outdir + '/dynamut_temp' - -if not os.path.exists(dynamut_temp_dir): - print('Creating dynamut_temp in outdir', outdir ) - os.makedirs(dynamut_temp_dir) - -batch_no = 1 -out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt' - - -#%% request calculation (no def) -with open("/home/tanu/git/Data/streptomycin/input/gid_complex.pdb", "rb") as pdb_file, open ("/home/tanu/git/LSHTM_analysis/dynamut/snp_test2.csv", "rb") as mutation_list: - files = {"wild": pdb_file - , "mutation_list": mutation_list} - body = {"chain": 'A' - , "email": 'tanushree.tunstall@lshtm.ac.uk'} - - response = requests.post(prediction_url, files = files, data = body) - print(response.status_code) - if response.history: - print('PASS: valid mutation submitted. Fetching result url') - url_match = re.search('/dynamut/results_prediction/.+(?=")', response.text) - url = host + url_match.group() - print(url) - - #=============== - # writing file: result urls - #=============== - out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt' - print('Writing output url file:', out_url_file) - myfile = open(out_url_file, 'a') - myfile.write(url) - myfile.close() - -#%% -def request_calculation(pdb_file, mutation_list +#%%##################################################################### +def submit_dynamut(host_url + , pdb_file + , mutation_list , chain - , my_email + , email_address , prediction_url , output_dir - , gene_name - , batch_no - , out_url_file): + , outfile_suffix + ): """ - Makes a POST request for a ligand affinity prediction. + Makes a POST request for dynamut predictions. + + @param host_url: valid host url for submitting the job + @type string @param pdb_file: valid path to pdb structure @type string @param mutation_list: list of mutations (1 per line) of the format: {WT}{Mut} @type string - + @param chain: single-letter(caps) @type chr @param prediction_url: dynamut url for prediction @type string - @return txt file containing batch no. of snps processed + @param output_dir: output dir + @type string + + @param outfile_suffix: outfile_suffix + @type string, default is batch no. + + @param outfile_suffix: to append to outfile + @type string + + @return writes a .txt file containing url for the snps processed with user provided suffix in filename @type string """ - with open(pdb_file, "rb") as pdb_file, open (mutation_list) as mutation_list: + with open(pdb_file, "rb") as pdb_file, open (mutation_list, "rb") as mutation_list: files = {"wild": pdb_file , "mutation_list": mutation_list} - body = {"chain": 'A' - , "email": 'tanushree.tunstall@lshtm.ac.uk'} + body = {"chain": chain + , "email": email_address} response = requests.post(prediction_url, files = files, data = body) print(response.status_code) if response.history: - print('PASS: valid mutation submitted. Fetching result url') + print('\nPASS: valid submission. Fetching result url') url_match = re.search('/dynamut/results_prediction/.+(?=")', response.text) - url = host + url_match.group() - print(url) + url = host_url + url_match.group() + print('\nURL for snp batch no ', str(outfile_suffix), ':', url) #=============== # writing file: result urls #=============== - out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt' - print('Writing output url file:', out_url_file) + dynamut_temp_dir = output_dir + '/dynamut_temp' # creates a temp dir within output_dir + if not os.path.exists(dynamut_temp_dir): + print('\nCreating dynamut_temp in output_dir', output_dir ) + os.makedirs(dynamut_temp_dir) + + out_url_file = dynamut_temp_dir + '/dynamut_result_url_' + str(outfile_suffix) + '.txt' + print('\nWriting output url file:', out_url_file) myfile = open(out_url_file, 'a') myfile.write(url) myfile.close() -#==================== -# Submit first batch +#%%##################################################################### diff --git a/dynamut/submit_def.py b/dynamut/submit_def.py deleted file mode 100644 index 1412ae2..0000000 --- a/dynamut/submit_def.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Aug 19 14:33:51 2020 - -@author: tanu -""" -#%% load packages -import os,sys -import subprocess -import argparse -import requests -import re -import time -from bs4 import BeautifulSoup -import pandas as pd -from pandas.api.types import is_string_dtype -from pandas.api.types import is_numeric_dtype -#%% homedir -homedir = os.path.expanduser('~') -print('My homedir is:', homedir) -#%% -def request_calculation(pdb_file - , mutation_list - , batch_no - , chain - , my_email - , prediction_url - , output_dir - #, gene_name - #, out_url_file - ): - """ - Makes a POST request for dynamut predictions. - - @param pdb_file: valid path to pdb structure - @type string - - @param mutation_list: list of mutations (1 per line) of the format: {WT}{Mut} - @type string - - @param batch_no: batch no so it can be added as a suffix to the the outfile - @type int - - @param chain: single-letter(caps) - @type chr - - @param prediction_url: dynamut url for prediction - @type string - - @param output_dir: output dir - @type string - - @param gene_name: name of gene - @type string - - #@param out_url_file: name of output file with batch no. as suffix - @type string - - @return txt file containing batch no. of snps processed (i.e out_url_file) - @type string - """ - - with open(pdb_file, "rb") as pdb_file, open (mutation_list, "rb") as mutation_list: - files = {"wild": pdb_file - , "mutation_list": mutation_list} - body = {"chain": chain - , "email": my_email} - - response = requests.post(prediction_url, files = files, data = body) - print(response.status_code) - if response.history: - print('\nPASS: valid submission. Fetching result url') - url_match = re.search('/dynamut/results_prediction/.+(?=")', response.text) - url = host + url_match.group() - print('\nURL for snp batch no ', str(batch_no), ':', url) - - #=============== - # writing file: result urls - #=============== - dynamut_temp_dir = outdir + '/dynamut_temp' - if not os.path.exists(dynamut_temp_dir): - print('\nCreating dynamut_temp in outdir', outdir ) - os.makedirs(dynamut_temp_dir) - - out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt' - print('\nWriting output url file:', out_url_file) - myfile = open(out_url_file, 'a') - myfile.write(url) - myfile.close() -#%%globals!? -host = 'http://biosig.unimelb.edu.au' -#prediction_url = f"{host}/dynamut/prediction_list" -#print(prediction_url) - -#gene = 'gid' -drug = 'streptomycin' -datadir = homedir + '/git/Data' -indir = datadir + '/' + drug + '/input' -outdir = datadir + '/' + drug + '/output' -#outdir = homedir + '/git/LSHTM_analysis/dynamut' # for example - -my_chain = 'A' -my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb' - -# batch 1: 00.txt -#my_mutation_list = homedir + '/git/Data/streptomycin/output/snp_batches/50/snp_batch_00.txt' -#my_batch = 1 - -# batch 2: 01.txt -my_mutation_list = outdir + '/snp_batches/50/snp_batch_01.txt' -my_batch = 2 - -# %% call this function -request_calculation (pdb_file = my_pdb_file -, mutation_list = my_mutation_list -, chain = my_chain -, my_email = 'tanushree.tunstall@lshtm.ac.uk' -, prediction_url = f"{host}/dynamut/prediction_list" -, output_dir = outdir -, batch_no = my_batch)