added tar.gz download within get_results.py
This commit is contained in:
parent
80f7e039ab
commit
56f5479c0b
4 changed files with 32 additions and 9 deletions
|
@ -17,6 +17,7 @@ import pandas as pd
|
|||
from pandas.api.types import is_string_dtype
|
||||
from pandas.api.types import is_numeric_dtype
|
||||
#%%#####################################################################
|
||||
|
||||
def get_results(url_file, host_url, output_dir, outfile_suffix):
|
||||
# initilialise empty df
|
||||
dynamut_results_out_df = pd.DataFrame()
|
||||
|
@ -27,7 +28,7 @@ def get_results(url_file, host_url, output_dir, outfile_suffix):
|
|||
#batch_response = requests.get(line, headers=headers)
|
||||
batch_response = requests.get(line)
|
||||
batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
|
||||
|
||||
|
||||
# initilialise empty df
|
||||
#dynamut_results_df = pd.DataFrame()
|
||||
for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
|
||||
|
@ -80,4 +81,17 @@ def get_results(url_file, host_url, output_dir, outfile_suffix):
|
|||
# build out filename
|
||||
out_filename = dynamut_results_dir + '/dynamut_output_' + outfile_suffix + '.csv'
|
||||
dynamut_results_out_df.to_csv(out_filename, index = False)
|
||||
|
||||
# Download .tar.gz file
|
||||
prediction_number = re.search(r'([0-9]+$)', line).group(0)
|
||||
tgz_url = f"{host_url}/dynamut/results_file/results_" + prediction_number + '.tar.gz'
|
||||
tgz_filename = dynamut_results_dir + '/' + outfile_suffix + '_results_' + prediction_number + '.tar.gz'
|
||||
response_tgz = requests.get(tgz_url, stream = True)
|
||||
if response_tgz.status_code == 200:
|
||||
print('\nDownloading tar.gz file:', tgz_url
|
||||
, '\nSaving file as:', tgz_filename)
|
||||
with open(tgz_filename, 'wb') as f:
|
||||
f.write(response_tgz.raw.read())
|
||||
|
||||
#%%#####################################################################
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue