From b69d9d729abee2384a0d43562a3e26a3db4a0faa Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 15 Feb 2021 12:22:52 +0000 Subject: [PATCH] added get_results_mcsm_na.py run_get_results.py to retrieve results for each batch run of 20 for mcsm_na --- mcsm_na/get_results_mcsm_na.py | 52 ++++++++++++++++++++++++++++++++++ mcsm_na/run_get_results.py | 38 +++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 mcsm_na/get_results_mcsm_na.py create mode 100755 mcsm_na/run_get_results.py diff --git a/mcsm_na/get_results_mcsm_na.py b/mcsm_na/get_results_mcsm_na.py new file mode 100644 index 0000000..75a7721 --- /dev/null +++ b/mcsm_na/get_results_mcsm_na.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 14:33:51 2020 + +@author: tanu +""" +#%% load packages +import os,sys +import subprocess +import argparse +import requests +import re +import time +from bs4 import BeautifulSoup +import pandas as pd +from pandas.api.types import is_string_dtype +from pandas.api.types import is_numeric_dtype +#%%##################################################################### + +def get_results(url_file, host_url, output_dir, outfile_suffix): + # initilialise empty df + #mcsm_na_results_out_df = pd.DataFrame() + with open(url_file, 'r') as f: + for count, line in enumerate(f): + line = line.strip() + print('URL no.', count+1, '\n', line) + + #============================ + # Writing results file: csv + #============================ + mcsm_na_results_dir = output_dir + '/mcsm_na_results' + if not os.path.exists(mcsm_na_results_dir): + print('\nCreating dir: mcsm_na_results within:', output_dir ) + os.makedirs(mcsm_na_results_dir) + + # Download the .txt + prediction_number = re.search(r'([0-9]+\.[0-9]+$)', line).group(0) + print('CHECK prediction no:', prediction_number) + txt_url = f"{host_url}/mcsm_na/static/results/" + prediction_number + '.txt' + print('CHECK txt url:', txt_url) + + out_filename = mcsm_na_results_dir + '/' + outfile_suffix + '_output_' + prediction_number + '.txt' + response_txt = requests.get(txt_url, stream = True) + if response_txt.status_code == 200: + print('\nDownloading .txt:', txt_url + , '\n\nSaving file as:', out_filename) + with open(out_filename, 'wb') as f: + f.write(response_txt.raw.read()) + +#%%##################################################################### + diff --git a/mcsm_na/run_get_results.py b/mcsm_na/run_get_results.py new file mode 100755 index 0000000..9c22909 --- /dev/null +++ b/mcsm_na/run_get_results.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Feb 12 12:15:26 2021 + +@author: tanu +""" +#%% load packages +import os +homedir = os.path.expanduser('~') +os.chdir (homedir + '/git/LSHTM_analysis/mcsm_na') +from get_results_mcsm_na import * +######################################################################## +# variables +my_host = 'http://biosig.unimelb.edu.au' + +# TODO: add cmd line args +#gene = 'gid' +drug = 'streptomycin' +datadir = homedir + '/git/Data' +indir = datadir + '/' + drug + '/input' +outdir = datadir + '/' + drug + '/output' + +# batch 1: 00.txt +my_url_file = outdir + '/mcsm_na_temp/mcsm_na_result_url_gid_b1.txt' +my_suffix = 'gid_b1' + +#========================== +# CALL: get_results() +# Data: gid+streptomycin +#========================== +print('Downloading results for:', my_url_file, '\nsuffix:', my_suffix) + +get_results(url_file = my_url_file + , host_url = my_host + , output_dir = outdir + , outfile_suffix = my_suffix) +#%%##################################################################### \ No newline at end of file