adding separae script for getting results for mcsm

This commit is contained in:
Tanushree Tunstall 2020-04-09 15:42:56 +01:00
parent 41f118223c
commit e4df1c5095
2 changed files with 216 additions and 40 deletions

View file

@ -17,7 +17,7 @@ from csv import reader
homedir = os.path.expanduser('~')
# set working dir
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
os.chdir(homedir + '/git/LSHTM_analysis/mcsm')
os.getcwd()
#=======================================================================
#%% command line args
@ -74,24 +74,27 @@ print('Output filename:', out_filename
, '\n=============================================================')
#%% global variables
HOST = "http://biosig.unimelb.edu.au"
PREDICTION_URL = f"{HOST}/mcsm_lig/prediction"
host = "http://biosig.unimelb.edu.au"
prediction_url = f"{host}/mcsm_lig/prediction"
#=======================================================================
#%%
def format_data(data_file):
"""
Read file containing SNPs to submit for mcsm analysis and save unique entries
Read file containing SNPs for mcsm analysis. This is mainly for
sanity check. Assumption is that the input file will have no duplicates.
#FIXME: perhaps, check if duplicates and write file/pass file
Parameters
----------
@param data_file: csv file containing nsSNPs for given drug and gene.
csv file format
===============
@param data_file csv file containing nsSNPs for given drug and gene.
csv file format:
single column with no headers with nsSNP format as below:
A1B
B2C
@type inputtsv: string
@type data_file: string
Returns
----------
@return unique SNPs (after removing duplicates)
"""
data = pd.read_csv(data_file, header = None)
@ -99,20 +102,33 @@ def format_data(data_file):
# print(data.head())
return data
def request_calculation(pdb_path, mutation, chain, ligand_id, affinity):
def request_calculation(pdb_file, mutation, chain, ligand_id, affinity):
"""
Makes a POST request for a ligand affinity prediction.
pdb_path (FIXME: variable name): valid path to pdb structure
mutation: single mutation of the format: {WT}<POS>{Mut}
chain: single-letter(caps)
wt affinity: in nM
lig_id = 3-letter code (should match pdb file)
Parameters
----------
@param pdb_file: valid path to pdb structure
@type string
@return a response object
@type response object
@param mutation: single mutation of the format: {WT}<POS>{Mut}
@type string
@param chain: single-letter(caps)
@type chr
@param wt affinity: in nM
@type number
@param lig_id: 3-letter code (should match pdb file)
@type string
Returns
----------
@return response object
@type object
"""
with open(pdb_path, "rb") as pdb_file:
with open(pdb_file, "rb") as pdb_file:
files = {"wild": pdb_file}
body = {
"mutation": mutation,
@ -121,30 +137,40 @@ def request_calculation(pdb_path, mutation, chain, ligand_id, affinity):
"affin_wt": affinity
}
response = requests.post(PREDICTION_URL, files = files, data = body)
response = requests.post(prediction_url, files = files, data = body)
response.raise_for_status()
return response
def find_results_url(holding_page, out_result_urls):
"""Extract the results url from the holding page returned after
def write_result_url(holding_page, out_result_url):
"""
Extract and write results url from the holding page returned after
requesting a calculation.
holding_page: response object returned from requesting a calculation.
returns: full url to the results page
Parameters
----------
@param holding_page: response object containinig html content
@type FIXME text
Returns
----------
@return None, writes a file containing result urls (= total no. of muts)
"""
url_match = re.search('/mcsm_lig/results_prediction/.+(?=")', holding_page.text)
url = HOST + url_match.group()
url = host + url_match.group()
#===============
# writing file
#===============
# myfile = open('/tmp/result_urls', 'a')
myfile = open(out_result_urls, 'a')
myfile = open(out_result_url, 'a')
myfile.write(url+'\n')
myfile.close()
print(myfile)
# return url
#=======================================================================
#%% call functions
mcsm_muts = format_data(infile_snps)
@ -166,21 +192,24 @@ my_affinity = 10
print('Result urls will be written in:', out_filename
, '\nPath:', outdir)
count=0
fh = open(infile_snps,'r')
file_len = os.system("wc -l %s" % infile_snps) # handy way of counting no.of entries getting processed
for mcsm_mut in fh:
mcsm_mut = mcsm_mut.rstrip()
print('Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity)
print('Processing mcsm mut:', mcsm_mut)
holding_page = request_calculation(pdb_file, mcsm_mut, my_chain, my_ligand_id, my_affinity)
time.sleep(1)
results_url = find_results_url(holding_page, outfile)
# print(mcsm_mut, holding_page)
count += 1
print('getting result url:'
, results_url
, count, 'of', file_len)
mut_count = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
infile_snps_len = os.popen('wc -l < %s' % infile_snps).read() # quicker than using Python :-)
print('Total SNPs for', gene, ':', infile_snps_len)
with open(infile_snps,'r') as fh:
for mcsm_mut in fh:
mcsm_mut = mcsm_mut.rstrip()
print('Processing mcsm mut:', mcsm_mut)
print('Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity)
holding_page = request_calculation(pdb_file, mcsm_mut, my_chain, my_ligand_id, my_affinity)
time.sleep(1)
print('Processing mutation: %s of %s' % (mut_count, infile_snps_len))
mut_count += 1
result_url = write_result_url(holding_page, outfile)