diff --git a/mcsm_na/examples.py b/mcsm_na/examples.py new file mode 100644 index 0000000..c8dabe5 --- /dev/null +++ b/mcsm_na/examples.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Feb 12 12:15:26 2021 + +@author: tanu +""" +import os +homedir = os.path.expanduser('~') +os.chdir (homedir + '/git/LSHTM_analysis/mcsm_na') +from submit_mcsm_na import * +from get_results import * +#%%##################################################################### +#EXAMPLE RUN for different stages +#===================== +# STAGE: submit_mcsm_na.py +#===================== +my_host = 'http://biosig.unimelb.edu.au' +my_prediction_url = f"{my_host}/mcsm_na/run_prediction_list" +print(my_prediction_url) + +my_outdir = homedir + '/git/LSHTM_analysis/mcsm_na' + +my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb' +my_mutation_list = homedir + '/git/LSHTM_analysis/mcsm_na/test_snps_b1.csv' +my_suffix = 'gid_test_b1' + +#---------------------------------------------- +# example 1: 2 snps in a file +#---------------------------------------------- +submit_mcsm_na(host_url = my_host +, pdb_file = my_pdb_file +, mutation_list = my_mutation_list +, nuc_type = 'RNA' +, prediction_url = my_prediction_url +, output_dir = my_outdir +, outfile_suffix = my_suffix) +#%%################################################################### + +#===================== +# STAGE: get_results.py +#===================== +my_host = 'http://biosig.unimelb.edu.au' +my_outdir = homedir + '/git/LSHTM_analysis/mcsm_na' + +#---------------------------------------------- +# example 1: single url in a single file +#---------------------------------------------- +my_url_file_single = homedir + '/git/LSHTM_analysis/mcsm_na/mcsm_na_temp/mcsm_na_result_url_gid_test_b1.txt' +print(my_url_file_single) +my_suffix = 'single' + +get_results(url_file = my_url_file_single + , host_url = my_host + , output_dir = my_outdir + , outfile_suffix = my_suffix) \ No newline at end of file diff --git a/mcsm_na/get_results.py b/mcsm_na/get_results.py new file mode 100644 index 0000000..59cb213 --- /dev/null +++ b/mcsm_na/get_results.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 14:33:51 2020 + +@author: tanu +""" +#%% load packages +import os,sys +import subprocess +import argparse +import requests +import re +import time +from bs4 import BeautifulSoup +import pandas as pd +from pandas.api.types import is_string_dtype +from pandas.api.types import is_numeric_dtype +#%%##################################################################### + +def get_results(url_file, host_url, output_dir, outfile_suffix): + # initilialise empty df + #mcsm_na_results_out_df = pd.DataFrame() + with open(url_file, 'r') as f: + for count, line in enumerate(f): + line = line.strip() + print('URL no.', count+1, '\n', line) + + #============================ + # Writing results file: csv + #============================ + mcsm_na_results_dir = output_dir + '/mcsm_na_results' + if not os.path.exists(mcsm_na_results_dir): + print('\nCreating dir: mcsm_na_results within:', output_dir ) + os.makedirs(mcsm_na_results_dir) + + # TODO: add as a cmd option + # Download .tar.gz file + prediction_number = re.search(r'([0-9]+\.[0-9]+$)', line).group(0) + print('CHECK prediction no:', prediction_number) + txt_url = f"{host_url}/mcsm_na/static/results/" + prediction_number + '.txt' + print('CHECK txt url:', txt_url) + + out_filename = mcsm_na_results_dir + '/' + outfile_suffix + '_output_' + prediction_number + '.txt' + response_txt = requests.get(txt_url, stream = True) + if response_txt.status_code == 200: + print('\nDownloading .txt:', txt_url + , '\n\nSaving file as:', out_filename) + with open(out_filename, 'wb') as f: + f.write(response_txt.raw.read()) + +#%%##################################################################### + diff --git a/mcsm_na/mcsm_na_results/single_output_1613147445.16.txt b/mcsm_na/mcsm_na_results/single_output_1613147445.16.txt new file mode 100644 index 0000000..4eadc4d Binary files /dev/null and b/mcsm_na/mcsm_na_results/single_output_1613147445.16.txt differ diff --git a/mcsm_na/mcsm_na_temp/mcsm_na_result_url_gid_test_b1.txt b/mcsm_na/mcsm_na_temp/mcsm_na_result_url_gid_test_b1.txt new file mode 100644 index 0000000..4ce4b6b --- /dev/null +++ b/mcsm_na/mcsm_na_temp/mcsm_na_result_url_gid_test_b1.txt @@ -0,0 +1 @@ +http://biosig.unimelb.edu.au/mcsm_na/results_prediction/1613147445.16 \ No newline at end of file diff --git a/mcsm_na/run_submit.py b/mcsm_na/run_submit.py new file mode 100755 index 0000000..b69e8bf --- /dev/null +++ b/mcsm_na/run_submit.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Feb 12 12:15:26 2021 + +@author: tanu +""" +import os +homedir = os.path.expanduser('~') +os.chdir (homedir + '/git/LSHTM_analysis/mcsm_na') +from submit import * + +#%%##################################################################### +#EXAMPLE RUN for different stages +#===================== +# STAGE: submit.py +#===================== +my_host = 'http://biosig.unimelb.edu.au' +my_prediction_url = f"{host}/mcsm_na/run_prediction" +print(my_prediction_url) + +my_outdir = homedir + '/git/LSHTM_analysis/mcsm_na' + +my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb' +my_mutation_list = homedir + '/git/LSHTM_analysis/mcsm_na/input_snp_test_b1.csv' +my_suffix = 'gid_test1' + +#---------------------------------------------- +# example 1: 2 snps in a file +#---------------------------------------------- +submit_mcsm_na(host_url = my_host +, pdb_file = my_pdb_file +, mutation_list = my_mutation_list +, chain = my_chain +, email_address = my_email +, prediction_url = my_prediction_url +, output_dir = my_outdir +, outfile_suffix = my_suffix) +#%%################################################################### + diff --git a/mcsm_na/submit_mcsm_na.py b/mcsm_na/submit_mcsm_na.py new file mode 100644 index 0000000..70349d9 --- /dev/null +++ b/mcsm_na/submit_mcsm_na.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 14:33:51 2020 + +@author: tanu +""" +#%% load packages +import os,sys +import subprocess +import argparse +import requests +import re +import time +from bs4 import BeautifulSoup +import pandas as pd +from pandas.api.types import is_string_dtype +from pandas.api.types import is_numeric_dtype +#%%##################################################################### +def submit_mcsm_na(host_url + , pdb_file + , mutation_list + , nuc_type + , prediction_url + , output_dir + , outfile_suffix + ): + """ + Makes a POST request for mcsm_na predictions. + + @param host_url: valid host url for submitting the job + @type string + + @param pdb_file: valid path to pdb structure + @type string + + @param mutation_list: list of mutations (1 per line) of the format:{chain} {WT}{Mut} [A X1Z} + @type string + + @param nuc_type: Nucleic acid type + @type string + + @param prediction_url: mcsm_na url for prediction + @type string + + @param output_dir: output dir + @type string + + @param outfile_suffix: outfile_suffix + @type string + + @return writes a .txt file containing url for the snps processed with user provided suffix in filename + @type string + """ + + with open(pdb_file, "rb") as pdb_file, open (mutation_list, "rb") as mutation_list: + files = {"wild": pdb_file + , "mutation_list": mutation_list} + body = {"na_type": nuc_type + ,"pred_type": 'list', + "pdb_code": ''} # apparently needs it even though blank! + + response = requests.post(prediction_url, files = files, data = body) + print(response.status_code) + if response.history: + print('\nPASS: valid submission. Fetching result url') + url_match = re.search('/mcsm_na/results_prediction/.+(?=")', response.text) + url = host_url + url_match.group() + print('\nURL for snp batch no ', str(outfile_suffix), ':', url) + + #=============== + # writing file: result urls + #=============== + mcsm_na_temp_dir = output_dir + '/mcsm_na_temp' # creates a temp dir within output_dir + if not os.path.exists(mcsm_na_temp_dir): + print('\nCreating mcsm_na_temp in output_dir', output_dir ) + os.makedirs(mcsm_na_temp_dir) + + out_url_file = mcsm_na_temp_dir + '/mcsm_na_result_url_' + str(outfile_suffix) + '.txt' + print('\nWriting output url file:', out_url_file) + myfile = open(out_url_file, 'a') + myfile.write(url) + myfile.close() +#%%##################################################################### diff --git a/mcsm_na/test_snps_b1.csv b/mcsm_na/test_snps_b1.csv new file mode 100644 index 0000000..d58d40d --- /dev/null +++ b/mcsm_na/test_snps_b1.csv @@ -0,0 +1,2 @@ +A P3S +A I4N