adding separae script for getting results for mcsm
This commit is contained in:
parent
41f118223c
commit
e4df1c5095
2 changed files with 216 additions and 40 deletions
109
mcsm/run_mcsm.py
109
mcsm/run_mcsm.py
|
@ -17,7 +17,7 @@ from csv import reader
|
|||
homedir = os.path.expanduser('~')
|
||||
# set working dir
|
||||
os.getcwd()
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/mcsm')
|
||||
os.getcwd()
|
||||
#=======================================================================
|
||||
#%% command line args
|
||||
|
@ -74,24 +74,27 @@ print('Output filename:', out_filename
|
|||
, '\n=============================================================')
|
||||
|
||||
#%% global variables
|
||||
HOST = "http://biosig.unimelb.edu.au"
|
||||
PREDICTION_URL = f"{HOST}/mcsm_lig/prediction"
|
||||
host = "http://biosig.unimelb.edu.au"
|
||||
prediction_url = f"{host}/mcsm_lig/prediction"
|
||||
#=======================================================================
|
||||
#%%
|
||||
def format_data(data_file):
|
||||
"""
|
||||
Read file containing SNPs to submit for mcsm analysis and save unique entries
|
||||
Read file containing SNPs for mcsm analysis. This is mainly for
|
||||
sanity check. Assumption is that the input file will have no duplicates.
|
||||
#FIXME: perhaps, check if duplicates and write file/pass file
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@param data_file: csv file containing nsSNPs for given drug and gene.
|
||||
csv file format
|
||||
===============
|
||||
@param data_file csv file containing nsSNPs for given drug and gene.
|
||||
csv file format:
|
||||
single column with no headers with nsSNP format as below:
|
||||
A1B
|
||||
B2C
|
||||
@type inputtsv: string
|
||||
|
||||
@type data_file: string
|
||||
|
||||
Returns
|
||||
----------
|
||||
@return unique SNPs (after removing duplicates)
|
||||
"""
|
||||
data = pd.read_csv(data_file, header = None)
|
||||
|
@ -99,20 +102,33 @@ def format_data(data_file):
|
|||
# print(data.head())
|
||||
return data
|
||||
|
||||
def request_calculation(pdb_path, mutation, chain, ligand_id, affinity):
|
||||
def request_calculation(pdb_file, mutation, chain, ligand_id, affinity):
|
||||
"""
|
||||
Makes a POST request for a ligand affinity prediction.
|
||||
|
||||
pdb_path (FIXME: variable name): valid path to pdb structure
|
||||
mutation: single mutation of the format: {WT}<POS>{Mut}
|
||||
chain: single-letter(caps)
|
||||
wt affinity: in nM
|
||||
lig_id = 3-letter code (should match pdb file)
|
||||
Parameters
|
||||
----------
|
||||
@param pdb_file: valid path to pdb structure
|
||||
@type string
|
||||
|
||||
@return a response object
|
||||
@type response object
|
||||
@param mutation: single mutation of the format: {WT}<POS>{Mut}
|
||||
@type string
|
||||
|
||||
@param chain: single-letter(caps)
|
||||
@type chr
|
||||
|
||||
@param wt affinity: in nM
|
||||
@type number
|
||||
|
||||
@param lig_id: 3-letter code (should match pdb file)
|
||||
@type string
|
||||
|
||||
Returns
|
||||
----------
|
||||
@return response object
|
||||
@type object
|
||||
"""
|
||||
with open(pdb_path, "rb") as pdb_file:
|
||||
with open(pdb_file, "rb") as pdb_file:
|
||||
files = {"wild": pdb_file}
|
||||
body = {
|
||||
"mutation": mutation,
|
||||
|
@ -121,30 +137,40 @@ def request_calculation(pdb_path, mutation, chain, ligand_id, affinity):
|
|||
"affin_wt": affinity
|
||||
}
|
||||
|
||||
response = requests.post(PREDICTION_URL, files = files, data = body)
|
||||
response = requests.post(prediction_url, files = files, data = body)
|
||||
response.raise_for_status()
|
||||
|
||||
return response
|
||||
|
||||
def find_results_url(holding_page, out_result_urls):
|
||||
"""Extract the results url from the holding page returned after
|
||||
def write_result_url(holding_page, out_result_url):
|
||||
"""
|
||||
Extract and write results url from the holding page returned after
|
||||
requesting a calculation.
|
||||
|
||||
holding_page: response object returned from requesting a calculation.
|
||||
returns: full url to the results page
|
||||
Parameters
|
||||
----------
|
||||
@param holding_page: response object containinig html content
|
||||
@type FIXME text
|
||||
|
||||
Returns
|
||||
----------
|
||||
@return None, writes a file containing result urls (= total no. of muts)
|
||||
"""
|
||||
url_match = re.search('/mcsm_lig/results_prediction/.+(?=")', holding_page.text)
|
||||
url = HOST + url_match.group()
|
||||
url = host + url_match.group()
|
||||
|
||||
#===============
|
||||
# writing file
|
||||
#===============
|
||||
# myfile = open('/tmp/result_urls', 'a')
|
||||
myfile = open(out_result_urls, 'a')
|
||||
myfile = open(out_result_url, 'a')
|
||||
myfile.write(url+'\n')
|
||||
myfile.close()
|
||||
print(myfile)
|
||||
# return url
|
||||
|
||||
|
||||
|
||||
#=======================================================================
|
||||
#%% call functions
|
||||
mcsm_muts = format_data(infile_snps)
|
||||
|
@ -166,21 +192,24 @@ my_affinity = 10
|
|||
print('Result urls will be written in:', out_filename
|
||||
, '\nPath:', outdir)
|
||||
|
||||
count=0
|
||||
fh = open(infile_snps,'r')
|
||||
file_len = os.system("wc -l %s" % infile_snps) # handy way of counting no.of entries getting processed
|
||||
for mcsm_mut in fh:
|
||||
mcsm_mut = mcsm_mut.rstrip()
|
||||
print('Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity)
|
||||
print('Processing mcsm mut:', mcsm_mut)
|
||||
holding_page = request_calculation(pdb_file, mcsm_mut, my_chain, my_ligand_id, my_affinity)
|
||||
time.sleep(1)
|
||||
results_url = find_results_url(holding_page, outfile)
|
||||
# print(mcsm_mut, holding_page)
|
||||
count += 1
|
||||
print('getting result url:'
|
||||
, results_url
|
||||
, count, 'of', file_len)
|
||||
mut_count = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
|
||||
infile_snps_len = os.popen('wc -l < %s' % infile_snps).read() # quicker than using Python :-)
|
||||
print('Total SNPs for', gene, ':', infile_snps_len)
|
||||
|
||||
with open(infile_snps,'r') as fh:
|
||||
for mcsm_mut in fh:
|
||||
mcsm_mut = mcsm_mut.rstrip()
|
||||
print('Processing mcsm mut:', mcsm_mut)
|
||||
print('Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity)
|
||||
|
||||
holding_page = request_calculation(pdb_file, mcsm_mut, my_chain, my_ligand_id, my_affinity)
|
||||
time.sleep(1)
|
||||
print('Processing mutation: %s of %s' % (mut_count, infile_snps_len))
|
||||
mut_count += 1
|
||||
result_url = write_result_url(holding_page, outfile)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue