handle not ready (refresh) url
Please enter the commit message for your changes. Lines starting
This commit is contained in:
parent
a405aa17c3
commit
b28d866237
2 changed files with 60 additions and 48 deletions
22
mcsm/mcsm.py
22
mcsm/mcsm.py
|
@ -119,16 +119,24 @@ def scrape_results(result_url):
|
||||||
# if results_response is not None:
|
# if results_response is not None:
|
||||||
# page = results_page.text
|
# page = results_page.text
|
||||||
if result_response.status_code == 200:
|
if result_response.status_code == 200:
|
||||||
print('SUCCESS: Fetching results')
|
print('Fetching results')
|
||||||
|
# extract results using the html parser
|
||||||
|
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
||||||
|
# print(soup)
|
||||||
|
web_result_raw = soup.find(class_ = 'span4').get_text()
|
||||||
|
#metatags = soup.find_all('meta')
|
||||||
|
metatags = soup.find_all('meta', attrs={'http-equiv':'refresh'})
|
||||||
|
#print('meta tags:', metatags)
|
||||||
|
if metatags:
|
||||||
|
print('WARNING: Submission not ready for URL:', result_url)
|
||||||
|
# TODO: Add logging
|
||||||
|
#if debug:
|
||||||
|
# debug.warning('submission not ready for URL:', result_url)
|
||||||
|
else:
|
||||||
|
return web_result_raw
|
||||||
else:
|
else:
|
||||||
print('FAIL: Could not fetch results'
|
print('FAIL: Could not fetch results'
|
||||||
, '\nCheck if url is valid')
|
, '\nCheck if url is valid')
|
||||||
# extract results using the html parser
|
|
||||||
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
|
||||||
# print(soup)
|
|
||||||
web_result_raw = soup.find(class_ = 'span4').get_text()
|
|
||||||
|
|
||||||
return web_result_raw
|
|
||||||
|
|
||||||
|
|
||||||
def build_result_dict(web_result_raw):
|
def build_result_dict(web_result_raw):
|
||||||
|
|
|
@ -62,53 +62,57 @@ out_filename_format = gene.lower() + '_mcsm_processed.csv'
|
||||||
outfile_format = outdir + '/' + out_filename_format
|
outfile_format = outdir + '/' + out_filename_format
|
||||||
#%%=====================================================================
|
#%%=====================================================================
|
||||||
def submit_mcsm():
|
def submit_mcsm():
|
||||||
my_chain = 'A'
|
my_chain = 'A'
|
||||||
# my_ligand_id = 'DCS' # FIXME
|
# my_ligand_id = 'DCS' # FIXME
|
||||||
my_ligand_id = 'RMP' # FIXME
|
my_ligand_id = 'RMP' # FIXME
|
||||||
my_affinity = 10
|
my_affinity = 10
|
||||||
|
|
||||||
print('Result urls and error file (if any) will be written in: ', outdir)
|
print('Result urls and error file (if any) will be written in: ', outdir)
|
||||||
|
|
||||||
# call function to format data to remove duplicate snps before submitting job
|
# call function to format data to remove duplicate snps before submitting job
|
||||||
mcsm_muts = format_data(infile_snps)
|
mcsm_muts = format_data(infile_snps)
|
||||||
mut_count = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
|
mut_count = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
|
||||||
infile_snps_len = os.popen('wc -l < %s' % infile_snps).read() # quicker than using Python :-)
|
infile_snps_len = os.popen('wc -l < %s' % infile_snps).read() # quicker than using Python :-)
|
||||||
print('Total SNPs for', gene, ':', infile_snps_len)
|
print('Total SNPs for', gene, ':', infile_snps_len)
|
||||||
for mcsm_mut in mcsm_muts:
|
for mcsm_mut in mcsm_muts:
|
||||||
print('Processing mutation: %s of %s' % (mut_count, infile_snps_len), mcsm_mut)
|
print('Processing mutation: %s of %s' % (mut_count, infile_snps_len), mcsm_mut)
|
||||||
print('Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene)
|
print('Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene)
|
||||||
# function call: to request mcsm prediction
|
# function call: to request mcsm prediction
|
||||||
# which writes file containing url for valid submissions and invalid muts to respective files
|
# which writes file containing url for valid submissions and invalid muts to respective files
|
||||||
holding_page = request_calculation(infile_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene, host)
|
holding_page = request_calculation(infile_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene, host)
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
mut_count += 1
|
mut_count += 1
|
||||||
# result_url = write_result_url(holding_page, result_urls, host)
|
# result_url = write_result_url(holding_page, result_urls, host)
|
||||||
|
|
||||||
print('Request submitted'
|
print('Request submitted'
|
||||||
, '\nCAUTION: Processing will take at least ten'
|
, '\nCAUTION: Processing will take at least ten'
|
||||||
, 'minutes, but will be longer for more mutations.')
|
, 'minutes, but will be longer for more mutations.')
|
||||||
#%%=====================================================================
|
#%%=====================================================================
|
||||||
def get_results():
|
def get_results():
|
||||||
|
|
||||||
output_df = pd.DataFrame()
|
output_df = pd.DataFrame()
|
||||||
url_counter = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
|
url_counter = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
|
||||||
infile_len = os.popen('wc -l < %s' % result_urls).read() # quicker than using Python :-) #FIXME filenme (infile_urls)
|
success_counter = 1
|
||||||
|
infile_len = os.popen('wc -l < %s' % result_urls).read() # quicker than using Python :-) #FIXME filenme (infile_urls)
|
||||||
|
|
||||||
print('Total URLs:', infile_len)
|
print('Total URLs:', infile_len)
|
||||||
|
|
||||||
with open(result_urls, 'r') as urlfile:
|
with open(result_urls, 'r') as urlfile:
|
||||||
for line in urlfile:
|
for line in urlfile:
|
||||||
url_line = line.strip()
|
url_line = line.strip()
|
||||||
# call functions
|
# call functions
|
||||||
results_interim = scrape_results(url_line)
|
results_interim = scrape_results(url_line)
|
||||||
result_dict = build_result_dict(results_interim)
|
if results_interim is not None:
|
||||||
|
print('Processing URL: %s of %s' % (url_counter, infile_len))
|
||||||
print('Processing URL: %s of %s' % (url_counter, infile_len))
|
result_dict = build_result_dict(results_interim)
|
||||||
df = pd.DataFrame(result_dict, index=[url_counter])
|
df = pd.DataFrame(result_dict, index=[url_counter])
|
||||||
url_counter += 1
|
output_df = output_df.append(df)
|
||||||
output_df = output_df.append(df)
|
success_counter += 1
|
||||||
|
url_counter += 1
|
||||||
output_df.to_csv(mcsm_output, index = None, header = True)
|
|
||||||
|
print('Total URLs: %s Successful: %s Failed: %s' % (url_counter-1, success_counter-1, (url_counter - success_counter)))
|
||||||
|
|
||||||
|
output_df.to_csv(mcsm_output, index = None, header = True)
|
||||||
#%%=====================================================================
|
#%%=====================================================================
|
||||||
def format_results():
|
def format_results():
|
||||||
print('Input file:', mcsm_output
|
print('Input file:', mcsm_output
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue