added mcsm_na scripts to submit batches of 20
This commit is contained in:
parent
7c84e8b044
commit
a6f1f65acf
7 changed files with 236 additions and 0 deletions
56
mcsm_na/examples.py
Normal file
56
mcsm_na/examples.py
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Fri Feb 12 12:15:26 2021
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
homedir = os.path.expanduser('~')
|
||||||
|
os.chdir (homedir + '/git/LSHTM_analysis/mcsm_na')
|
||||||
|
from submit_mcsm_na import *
|
||||||
|
from get_results import *
|
||||||
|
#%%#####################################################################
|
||||||
|
#EXAMPLE RUN for different stages
|
||||||
|
#=====================
|
||||||
|
# STAGE: submit_mcsm_na.py
|
||||||
|
#=====================
|
||||||
|
my_host = 'http://biosig.unimelb.edu.au'
|
||||||
|
my_prediction_url = f"{my_host}/mcsm_na/run_prediction_list"
|
||||||
|
print(my_prediction_url)
|
||||||
|
|
||||||
|
my_outdir = homedir + '/git/LSHTM_analysis/mcsm_na'
|
||||||
|
|
||||||
|
my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb'
|
||||||
|
my_mutation_list = homedir + '/git/LSHTM_analysis/mcsm_na/test_snps_b1.csv'
|
||||||
|
my_suffix = 'gid_test_b1'
|
||||||
|
|
||||||
|
#----------------------------------------------
|
||||||
|
# example 1: 2 snps in a file
|
||||||
|
#----------------------------------------------
|
||||||
|
submit_mcsm_na(host_url = my_host
|
||||||
|
, pdb_file = my_pdb_file
|
||||||
|
, mutation_list = my_mutation_list
|
||||||
|
, nuc_type = 'RNA'
|
||||||
|
, prediction_url = my_prediction_url
|
||||||
|
, output_dir = my_outdir
|
||||||
|
, outfile_suffix = my_suffix)
|
||||||
|
#%%###################################################################
|
||||||
|
|
||||||
|
#=====================
|
||||||
|
# STAGE: get_results.py
|
||||||
|
#=====================
|
||||||
|
my_host = 'http://biosig.unimelb.edu.au'
|
||||||
|
my_outdir = homedir + '/git/LSHTM_analysis/mcsm_na'
|
||||||
|
|
||||||
|
#----------------------------------------------
|
||||||
|
# example 1: single url in a single file
|
||||||
|
#----------------------------------------------
|
||||||
|
my_url_file_single = homedir + '/git/LSHTM_analysis/mcsm_na/mcsm_na_temp/mcsm_na_result_url_gid_test_b1.txt'
|
||||||
|
print(my_url_file_single)
|
||||||
|
my_suffix = 'single'
|
||||||
|
|
||||||
|
get_results(url_file = my_url_file_single
|
||||||
|
, host_url = my_host
|
||||||
|
, output_dir = my_outdir
|
||||||
|
, outfile_suffix = my_suffix)
|
53
mcsm_na/get_results.py
Normal file
53
mcsm_na/get_results.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Wed Aug 19 14:33:51 2020
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
#%% load packages
|
||||||
|
import os,sys
|
||||||
|
import subprocess
|
||||||
|
import argparse
|
||||||
|
import requests
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import pandas as pd
|
||||||
|
from pandas.api.types import is_string_dtype
|
||||||
|
from pandas.api.types import is_numeric_dtype
|
||||||
|
#%%#####################################################################
|
||||||
|
|
||||||
|
def get_results(url_file, host_url, output_dir, outfile_suffix):
|
||||||
|
# initilialise empty df
|
||||||
|
#mcsm_na_results_out_df = pd.DataFrame()
|
||||||
|
with open(url_file, 'r') as f:
|
||||||
|
for count, line in enumerate(f):
|
||||||
|
line = line.strip()
|
||||||
|
print('URL no.', count+1, '\n', line)
|
||||||
|
|
||||||
|
#============================
|
||||||
|
# Writing results file: csv
|
||||||
|
#============================
|
||||||
|
mcsm_na_results_dir = output_dir + '/mcsm_na_results'
|
||||||
|
if not os.path.exists(mcsm_na_results_dir):
|
||||||
|
print('\nCreating dir: mcsm_na_results within:', output_dir )
|
||||||
|
os.makedirs(mcsm_na_results_dir)
|
||||||
|
|
||||||
|
# TODO: add as a cmd option
|
||||||
|
# Download .tar.gz file
|
||||||
|
prediction_number = re.search(r'([0-9]+\.[0-9]+$)', line).group(0)
|
||||||
|
print('CHECK prediction no:', prediction_number)
|
||||||
|
txt_url = f"{host_url}/mcsm_na/static/results/" + prediction_number + '.txt'
|
||||||
|
print('CHECK txt url:', txt_url)
|
||||||
|
|
||||||
|
out_filename = mcsm_na_results_dir + '/' + outfile_suffix + '_output_' + prediction_number + '.txt'
|
||||||
|
response_txt = requests.get(txt_url, stream = True)
|
||||||
|
if response_txt.status_code == 200:
|
||||||
|
print('\nDownloading .txt:', txt_url
|
||||||
|
, '\n\nSaving file as:', out_filename)
|
||||||
|
with open(out_filename, 'wb') as f:
|
||||||
|
f.write(response_txt.raw.read())
|
||||||
|
|
||||||
|
#%%#####################################################################
|
||||||
|
|
BIN
mcsm_na/mcsm_na_results/single_output_1613147445.16.txt
Normal file
BIN
mcsm_na/mcsm_na_results/single_output_1613147445.16.txt
Normal file
Binary file not shown.
1
mcsm_na/mcsm_na_temp/mcsm_na_result_url_gid_test_b1.txt
Normal file
1
mcsm_na/mcsm_na_temp/mcsm_na_result_url_gid_test_b1.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
http://biosig.unimelb.edu.au/mcsm_na/results_prediction/1613147445.16
|
40
mcsm_na/run_submit.py
Executable file
40
mcsm_na/run_submit.py
Executable file
|
@ -0,0 +1,40 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Fri Feb 12 12:15:26 2021
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
homedir = os.path.expanduser('~')
|
||||||
|
os.chdir (homedir + '/git/LSHTM_analysis/mcsm_na')
|
||||||
|
from submit import *
|
||||||
|
|
||||||
|
#%%#####################################################################
|
||||||
|
#EXAMPLE RUN for different stages
|
||||||
|
#=====================
|
||||||
|
# STAGE: submit.py
|
||||||
|
#=====================
|
||||||
|
my_host = 'http://biosig.unimelb.edu.au'
|
||||||
|
my_prediction_url = f"{host}/mcsm_na/run_prediction"
|
||||||
|
print(my_prediction_url)
|
||||||
|
|
||||||
|
my_outdir = homedir + '/git/LSHTM_analysis/mcsm_na'
|
||||||
|
|
||||||
|
my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb'
|
||||||
|
my_mutation_list = homedir + '/git/LSHTM_analysis/mcsm_na/input_snp_test_b1.csv'
|
||||||
|
my_suffix = 'gid_test1'
|
||||||
|
|
||||||
|
#----------------------------------------------
|
||||||
|
# example 1: 2 snps in a file
|
||||||
|
#----------------------------------------------
|
||||||
|
submit_mcsm_na(host_url = my_host
|
||||||
|
, pdb_file = my_pdb_file
|
||||||
|
, mutation_list = my_mutation_list
|
||||||
|
, chain = my_chain
|
||||||
|
, email_address = my_email
|
||||||
|
, prediction_url = my_prediction_url
|
||||||
|
, output_dir = my_outdir
|
||||||
|
, outfile_suffix = my_suffix)
|
||||||
|
#%%###################################################################
|
||||||
|
|
84
mcsm_na/submit_mcsm_na.py
Normal file
84
mcsm_na/submit_mcsm_na.py
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Wed Aug 19 14:33:51 2020
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
#%% load packages
|
||||||
|
import os,sys
|
||||||
|
import subprocess
|
||||||
|
import argparse
|
||||||
|
import requests
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import pandas as pd
|
||||||
|
from pandas.api.types import is_string_dtype
|
||||||
|
from pandas.api.types import is_numeric_dtype
|
||||||
|
#%%#####################################################################
|
||||||
|
def submit_mcsm_na(host_url
|
||||||
|
, pdb_file
|
||||||
|
, mutation_list
|
||||||
|
, nuc_type
|
||||||
|
, prediction_url
|
||||||
|
, output_dir
|
||||||
|
, outfile_suffix
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Makes a POST request for mcsm_na predictions.
|
||||||
|
|
||||||
|
@param host_url: valid host url for submitting the job
|
||||||
|
@type string
|
||||||
|
|
||||||
|
@param pdb_file: valid path to pdb structure
|
||||||
|
@type string
|
||||||
|
|
||||||
|
@param mutation_list: list of mutations (1 per line) of the format:{chain} {WT}<POS>{Mut} [A X1Z}
|
||||||
|
@type string
|
||||||
|
|
||||||
|
@param nuc_type: Nucleic acid type
|
||||||
|
@type string
|
||||||
|
|
||||||
|
@param prediction_url: mcsm_na url for prediction
|
||||||
|
@type string
|
||||||
|
|
||||||
|
@param output_dir: output dir
|
||||||
|
@type string
|
||||||
|
|
||||||
|
@param outfile_suffix: outfile_suffix
|
||||||
|
@type string
|
||||||
|
|
||||||
|
@return writes a .txt file containing url for the snps processed with user provided suffix in filename
|
||||||
|
@type string
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open(pdb_file, "rb") as pdb_file, open (mutation_list, "rb") as mutation_list:
|
||||||
|
files = {"wild": pdb_file
|
||||||
|
, "mutation_list": mutation_list}
|
||||||
|
body = {"na_type": nuc_type
|
||||||
|
,"pred_type": 'list',
|
||||||
|
"pdb_code": ''} # apparently needs it even though blank!
|
||||||
|
|
||||||
|
response = requests.post(prediction_url, files = files, data = body)
|
||||||
|
print(response.status_code)
|
||||||
|
if response.history:
|
||||||
|
print('\nPASS: valid submission. Fetching result url')
|
||||||
|
url_match = re.search('/mcsm_na/results_prediction/.+(?=")', response.text)
|
||||||
|
url = host_url + url_match.group()
|
||||||
|
print('\nURL for snp batch no ', str(outfile_suffix), ':', url)
|
||||||
|
|
||||||
|
#===============
|
||||||
|
# writing file: result urls
|
||||||
|
#===============
|
||||||
|
mcsm_na_temp_dir = output_dir + '/mcsm_na_temp' # creates a temp dir within output_dir
|
||||||
|
if not os.path.exists(mcsm_na_temp_dir):
|
||||||
|
print('\nCreating mcsm_na_temp in output_dir', output_dir )
|
||||||
|
os.makedirs(mcsm_na_temp_dir)
|
||||||
|
|
||||||
|
out_url_file = mcsm_na_temp_dir + '/mcsm_na_result_url_' + str(outfile_suffix) + '.txt'
|
||||||
|
print('\nWriting output url file:', out_url_file)
|
||||||
|
myfile = open(out_url_file, 'a')
|
||||||
|
myfile.write(url)
|
||||||
|
myfile.close()
|
||||||
|
#%%#####################################################################
|
2
mcsm_na/test_snps_b1.csv
Normal file
2
mcsm_na/test_snps_b1.csv
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
A P3S
|
||||||
|
A I4N
|
|
Loading…
Add table
Add a link
Reference in a new issue