From a5f187815856a8945084fb068e6482562e1f54dd Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 12 Feb 2021 15:24:51 +0000 Subject: [PATCH] added tar.gz download within get_results.py --- .../dynamut_results/dynamut_output_multiple.csv | 7 ------- dynamut/examples.py | 17 ++++++++++++++++- dynamut/get_results.py | 16 +++++++++++++++- dynamut/run_results.py | 1 + 4 files changed, 32 insertions(+), 9 deletions(-) delete mode 100644 dynamut/dynamut_results/dynamut_output_multiple.csv diff --git a/dynamut/dynamut_results/dynamut_output_multiple.csv b/dynamut/dynamut_results/dynamut_output_multiple.csv deleted file mode 100644 index fef0be4..0000000 --- a/dynamut/dynamut_results/dynamut_output_multiple.csv +++ /dev/null @@ -1,7 +0,0 @@ -mutationinformation,ddg_dynamut,ddg_encom,ddg_mcsm,ddg_sdm,ddg_duet,dds_encom -G13V,0.006 kcal/mol (Stabilizing),-0.053 kcal/mol (Destabilizing),-0.261 kcal/mol (Destabilizing),-0.120 kcal/mol (Destabilizing),0.120 kcal/mol (Stabilizing),0.066 kcal.mol-1.K-1 (Increase of molecule flexibility) -A19T,-0.077 kcal/mol (Destabilizing),0.224 kcal/mol (Destabilizing),-0.631 kcal/mol (Destabilizing),-2.620 kcal/mol (Destabilizing),-0.758 kcal/mol (Destabilizing),-0.280 kcal.mol-1.K-1 (Decrease of molecule flexibility) -I4N,-0.239 kcal/mol (Destabilizing),-0.720 kcal/mol (Destabilizing),-0.728 kcal/mol (Destabilizing),-0.550 kcal/mol (Destabilizing),-0.461 kcal/mol (Destabilizing),0.900 kcal.mol-1.K-1 (Increase of molecule flexibility) -P3S,0.727 kcal/mol (Stabilizing),0.334 kcal/mol (Destabilizing),-0.672 kcal/mol (Destabilizing),0.010 kcal/mol (Stabilizing),-0.252 kcal/mol (Destabilizing),-0.418 kcal.mol-1.K-1 (Decrease of molecule flexibility) -F12S,-0.270 kcal/mol (Destabilizing),0.048 kcal/mol (Destabilizing),-1.028 kcal/mol (Destabilizing),-0.930 kcal/mol (Destabilizing),-0.993 kcal/mol (Destabilizing),-0.060 kcal.mol-1.K-1 (Decrease of molecule flexibility) -A19V,2.389 kcal/mol (Stabilizing),0.450 kcal/mol (Destabilizing),0.659 kcal/mol (Stabilizing),-0.170 kcal/mol (Destabilizing),1.040 kcal/mol (Stabilizing),-0.562 kcal.mol-1.K-1 (Decrease of molecule flexibility) diff --git a/dynamut/examples.py b/dynamut/examples.py index b8d6c23..1875850 100644 --- a/dynamut/examples.py +++ b/dynamut/examples.py @@ -61,7 +61,7 @@ get_results(url_file = my_url_file_multiple # example 2: single url in a file #---------------------------------------------- my_url_file_single = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_single.txt' -print(my_url_file_multiple) +print(my_url_file_single) my_suffix = 'single' get_results(my_url_file_single @@ -69,3 +69,18 @@ get_results(my_url_file_single , output_dir = my_outdir , outfile_suffix = my_suffix) #%%################################################################### +url = 'http://biosig.unimelb.edu.au/dynamut/results_prediction/161296287365' +host_url = 'http://biosig.unimelb.edu.au' +prediction_number = re.search(r'([0-9]+$)', url).group(0) +print(prediction_number) +dynamut_results_dir = '/home/tanu/git/LSHTM_analysis/dynamut/dynamut_results' +tgz_url = f"{host_url}/dynamut/results_file/results_" + prediction_number + '.tar.gz' +print(tgz_url) +outfile_suffix = 'foo' + + +target_path = dynamut_results_dir + '/' + outfile_suffix + '_results_' + prediction_number + '.tar.gz' +response_tgz = requests.get(tgz_url, stream = True) +if response_tgz.status_code == 200: + with open(target_path, 'wb') as f: + f.write(response_tgz.raw.read()) diff --git a/dynamut/get_results.py b/dynamut/get_results.py index 5ae343a..4b0a38b 100644 --- a/dynamut/get_results.py +++ b/dynamut/get_results.py @@ -17,6 +17,7 @@ import pandas as pd from pandas.api.types import is_string_dtype from pandas.api.types import is_numeric_dtype #%%##################################################################### + def get_results(url_file, host_url, output_dir, outfile_suffix): # initilialise empty df dynamut_results_out_df = pd.DataFrame() @@ -27,7 +28,7 @@ def get_results(url_file, host_url, output_dir, outfile_suffix): #batch_response = requests.get(line, headers=headers) batch_response = requests.get(line) batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser') - + # initilialise empty df #dynamut_results_df = pd.DataFrame() for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}): @@ -80,4 +81,17 @@ def get_results(url_file, host_url, output_dir, outfile_suffix): # build out filename out_filename = dynamut_results_dir + '/dynamut_output_' + outfile_suffix + '.csv' dynamut_results_out_df.to_csv(out_filename, index = False) + + # Download .tar.gz file + prediction_number = re.search(r'([0-9]+$)', line).group(0) + tgz_url = f"{host_url}/dynamut/results_file/results_" + prediction_number + '.tar.gz' + tgz_filename = dynamut_results_dir + '/' + outfile_suffix + '_results_' + prediction_number + '.tar.gz' + response_tgz = requests.get(tgz_url, stream = True) + if response_tgz.status_code == 200: + print('\nDownloading tar.gz file:', tgz_url + , '\nSaving file as:', tgz_filename) + with open(tgz_filename, 'wb') as f: + f.write(response_tgz.raw.read()) + #%%##################################################################### + diff --git a/dynamut/run_results.py b/dynamut/run_results.py index 429da5d..b6fec62 100644 --- a/dynamut/run_results.py +++ b/dynamut/run_results.py @@ -35,4 +35,5 @@ get_results(url_file = my_url_file , host_url = my_host , output_dir = outdir , outfile_suffix = my_suffix) + ########################################################################