handle not ready (refresh) url

Please enter the commit message for your changes. Lines starting
This commit is contained in:
Tanushree Tunstall 2020-04-21 17:12:18 +01:00
parent 8b1a7fc71c
commit 1d84846789
2 changed files with 60 additions and 48 deletions

View file

@ -119,16 +119,24 @@ def scrape_results(result_url):
# if results_response is not None:
# page = results_page.text
if result_response.status_code == 200:
print('SUCCESS: Fetching results')
print('Fetching results')
# extract results using the html parser
soup = BeautifulSoup(result_response.text, features = 'html.parser')
# print(soup)
web_result_raw = soup.find(class_ = 'span4').get_text()
#metatags = soup.find_all('meta')
metatags = soup.find_all('meta', attrs={'http-equiv':'refresh'})
#print('meta tags:', metatags)
if metatags:
print('WARNING: Submission not ready for URL:', result_url)
# TODO: Add logging
#if debug:
# debug.warning('submission not ready for URL:', result_url)
else:
return web_result_raw
else:
print('FAIL: Could not fetch results'
, '\nCheck if url is valid')
# extract results using the html parser
soup = BeautifulSoup(result_response.text, features = 'html.parser')
# print(soup)
web_result_raw = soup.find(class_ = 'span4').get_text()
return web_result_raw
def build_result_dict(web_result_raw):