added tar.gz download within get_results.py

This commit is contained in:
Tanushree Tunstall 2021-02-12 15:24:51 +00:00
parent 80f7e039ab
commit 56f5479c0b
4 changed files with 32 additions and 9 deletions

View file

@ -17,6 +17,7 @@ import pandas as pd
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
#%%#####################################################################
def get_results(url_file, host_url, output_dir, outfile_suffix):
# initilialise empty df
dynamut_results_out_df = pd.DataFrame()
@ -27,7 +28,7 @@ def get_results(url_file, host_url, output_dir, outfile_suffix):
#batch_response = requests.get(line, headers=headers)
batch_response = requests.get(line)
batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
# initilialise empty df
#dynamut_results_df = pd.DataFrame()
for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
@ -80,4 +81,17 @@ def get_results(url_file, host_url, output_dir, outfile_suffix):
# build out filename
out_filename = dynamut_results_dir + '/dynamut_output_' + outfile_suffix + '.csv'
dynamut_results_out_df.to_csv(out_filename, index = False)
# Download .tar.gz file
prediction_number = re.search(r'([0-9]+$)', line).group(0)
tgz_url = f"{host_url}/dynamut/results_file/results_" + prediction_number + '.tar.gz'
tgz_filename = dynamut_results_dir + '/' + outfile_suffix + '_results_' + prediction_number + '.tar.gz'
response_tgz = requests.get(tgz_url, stream = True)
if response_tgz.status_code == 200:
print('\nDownloading tar.gz file:', tgz_url
, '\nSaving file as:', tgz_filename)
with open(tgz_filename, 'wb') as f:
f.write(response_tgz.raw.read())
#%%#####################################################################