added mcsm_na scripts to submit batches of 20

This commit is contained in:
Tanushree Tunstall 2021-02-12 16:51:41 +00:00
parent 7c84e8b044
commit a6f1f65acf
7 changed files with 236 additions and 0 deletions

56
mcsm_na/examples.py Normal file
View file

@ -0,0 +1,56 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 12 12:15:26 2021
@author: tanu
"""
import os
homedir = os.path.expanduser('~')
os.chdir (homedir + '/git/LSHTM_analysis/mcsm_na')
from submit_mcsm_na import *
from get_results import *
#%%#####################################################################
#EXAMPLE RUN for different stages
#=====================
# STAGE: submit_mcsm_na.py
#=====================
my_host = 'http://biosig.unimelb.edu.au'
my_prediction_url = f"{my_host}/mcsm_na/run_prediction_list"
print(my_prediction_url)
my_outdir = homedir + '/git/LSHTM_analysis/mcsm_na'
my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb'
my_mutation_list = homedir + '/git/LSHTM_analysis/mcsm_na/test_snps_b1.csv'
my_suffix = 'gid_test_b1'
#----------------------------------------------
# example 1: 2 snps in a file
#----------------------------------------------
submit_mcsm_na(host_url = my_host
, pdb_file = my_pdb_file
, mutation_list = my_mutation_list
, nuc_type = 'RNA'
, prediction_url = my_prediction_url
, output_dir = my_outdir
, outfile_suffix = my_suffix)
#%%###################################################################
#=====================
# STAGE: get_results.py
#=====================
my_host = 'http://biosig.unimelb.edu.au'
my_outdir = homedir + '/git/LSHTM_analysis/mcsm_na'
#----------------------------------------------
# example 1: single url in a single file
#----------------------------------------------
my_url_file_single = homedir + '/git/LSHTM_analysis/mcsm_na/mcsm_na_temp/mcsm_na_result_url_gid_test_b1.txt'
print(my_url_file_single)
my_suffix = 'single'
get_results(url_file = my_url_file_single
, host_url = my_host
, output_dir = my_outdir
, outfile_suffix = my_suffix)

53
mcsm_na/get_results.py Normal file
View file

@ -0,0 +1,53 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 19 14:33:51 2020
@author: tanu
"""
#%% load packages
import os,sys
import subprocess
import argparse
import requests
import re
import time
from bs4 import BeautifulSoup
import pandas as pd
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
#%%#####################################################################
def get_results(url_file, host_url, output_dir, outfile_suffix):
# initilialise empty df
#mcsm_na_results_out_df = pd.DataFrame()
with open(url_file, 'r') as f:
for count, line in enumerate(f):
line = line.strip()
print('URL no.', count+1, '\n', line)
#============================
# Writing results file: csv
#============================
mcsm_na_results_dir = output_dir + '/mcsm_na_results'
if not os.path.exists(mcsm_na_results_dir):
print('\nCreating dir: mcsm_na_results within:', output_dir )
os.makedirs(mcsm_na_results_dir)
# TODO: add as a cmd option
# Download .tar.gz file
prediction_number = re.search(r'([0-9]+\.[0-9]+$)', line).group(0)
print('CHECK prediction no:', prediction_number)
txt_url = f"{host_url}/mcsm_na/static/results/" + prediction_number + '.txt'
print('CHECK txt url:', txt_url)
out_filename = mcsm_na_results_dir + '/' + outfile_suffix + '_output_' + prediction_number + '.txt'
response_txt = requests.get(txt_url, stream = True)
if response_txt.status_code == 200:
print('\nDownloading .txt:', txt_url
, '\n\nSaving file as:', out_filename)
with open(out_filename, 'wb') as f:
f.write(response_txt.raw.read())
#%%#####################################################################

View file

@ -0,0 +1 @@
http://biosig.unimelb.edu.au/mcsm_na/results_prediction/1613147445.16

40
mcsm_na/run_submit.py Executable file
View file

@ -0,0 +1,40 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 12 12:15:26 2021
@author: tanu
"""
import os
homedir = os.path.expanduser('~')
os.chdir (homedir + '/git/LSHTM_analysis/mcsm_na')
from submit import *
#%%#####################################################################
#EXAMPLE RUN for different stages
#=====================
# STAGE: submit.py
#=====================
my_host = 'http://biosig.unimelb.edu.au'
my_prediction_url = f"{host}/mcsm_na/run_prediction"
print(my_prediction_url)
my_outdir = homedir + '/git/LSHTM_analysis/mcsm_na'
my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb'
my_mutation_list = homedir + '/git/LSHTM_analysis/mcsm_na/input_snp_test_b1.csv'
my_suffix = 'gid_test1'
#----------------------------------------------
# example 1: 2 snps in a file
#----------------------------------------------
submit_mcsm_na(host_url = my_host
, pdb_file = my_pdb_file
, mutation_list = my_mutation_list
, chain = my_chain
, email_address = my_email
, prediction_url = my_prediction_url
, output_dir = my_outdir
, outfile_suffix = my_suffix)
#%%###################################################################

84
mcsm_na/submit_mcsm_na.py Normal file
View file

@ -0,0 +1,84 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 19 14:33:51 2020
@author: tanu
"""
#%% load packages
import os,sys
import subprocess
import argparse
import requests
import re
import time
from bs4 import BeautifulSoup
import pandas as pd
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
#%%#####################################################################
def submit_mcsm_na(host_url
, pdb_file
, mutation_list
, nuc_type
, prediction_url
, output_dir
, outfile_suffix
):
"""
Makes a POST request for mcsm_na predictions.
@param host_url: valid host url for submitting the job
@type string
@param pdb_file: valid path to pdb structure
@type string
@param mutation_list: list of mutations (1 per line) of the format:{chain} {WT}<POS>{Mut} [A X1Z}
@type string
@param nuc_type: Nucleic acid type
@type string
@param prediction_url: mcsm_na url for prediction
@type string
@param output_dir: output dir
@type string
@param outfile_suffix: outfile_suffix
@type string
@return writes a .txt file containing url for the snps processed with user provided suffix in filename
@type string
"""
with open(pdb_file, "rb") as pdb_file, open (mutation_list, "rb") as mutation_list:
files = {"wild": pdb_file
, "mutation_list": mutation_list}
body = {"na_type": nuc_type
,"pred_type": 'list',
"pdb_code": ''} # apparently needs it even though blank!
response = requests.post(prediction_url, files = files, data = body)
print(response.status_code)
if response.history:
print('\nPASS: valid submission. Fetching result url')
url_match = re.search('/mcsm_na/results_prediction/.+(?=")', response.text)
url = host_url + url_match.group()
print('\nURL for snp batch no ', str(outfile_suffix), ':', url)
#===============
# writing file: result urls
#===============
mcsm_na_temp_dir = output_dir + '/mcsm_na_temp' # creates a temp dir within output_dir
if not os.path.exists(mcsm_na_temp_dir):
print('\nCreating mcsm_na_temp in output_dir', output_dir )
os.makedirs(mcsm_na_temp_dir)
out_url_file = mcsm_na_temp_dir + '/mcsm_na_result_url_' + str(outfile_suffix) + '.txt'
print('\nWriting output url file:', out_url_file)
myfile = open(out_url_file, 'a')
myfile.write(url)
myfile.close()
#%%#####################################################################

2
mcsm_na/test_snps_b1.csv Normal file
View file

@ -0,0 +1,2 @@
A P3S
A I4N
1 A P3S
2 A I4N