added get_results_mcsm_na.py run_get_results.py to retrieve results for each batch run of 20 for mcsm_na

2021-02-15 12:22:52 +00:00 · 2021-02-15 12:22:52 +00:00 · b69d9d729a
commit b69d9d729a
parent 7a74fecbda
2 changed files with 90 additions and 0 deletions
--- a/mcsm_na/get_results_mcsm_na.py
+++ b/mcsm_na/get_results_mcsm_na.py
@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Aug 19 14:33:51 2020
+
+@author: tanu
+"""
+#%% load packages
+import os,sys
+import subprocess
+import argparse
+import requests
+import re
+import time
+from bs4 import BeautifulSoup
+import pandas as pd
+from pandas.api.types import is_string_dtype
+from pandas.api.types import is_numeric_dtype
+#%%#####################################################################
+
+def get_results(url_file, host_url, output_dir, outfile_suffix):
+    # initilialise empty df
+    #mcsm_na_results_out_df = pd.DataFrame()
+    with open(url_file, 'r') as f:
+        for count, line in enumerate(f):
+            line = line.strip()
+            print('URL no.', count+1, '\n', line)
+            
+            #============================
+            # Writing results file: csv
+            #============================                   
+            mcsm_na_results_dir = output_dir + '/mcsm_na_results'
+            if not os.path.exists(mcsm_na_results_dir):
+                print('\nCreating dir: mcsm_na_results within:', output_dir )
+                os.makedirs(mcsm_na_results_dir)   
+                          
+            # Download the .txt
+            prediction_number = re.search(r'([0-9]+\.[0-9]+$)', line).group(0)
+            print('CHECK prediction no:', prediction_number)
+            txt_url = f"{host_url}/mcsm_na/static/results/" + prediction_number + '.txt'
+            print('CHECK txt url:', txt_url)
+            
+            out_filename = mcsm_na_results_dir + '/' + outfile_suffix + '_output_' + prediction_number + '.txt'
+            response_txt = requests.get(txt_url, stream = True)
+            if response_txt.status_code == 200:
+                print('\nDownloading .txt:', txt_url
+                      , '\n\nSaving file as:', out_filename)
+                with open(out_filename, 'wb') as f:
+                    f.write(response_txt.raw.read())
+   
+#%%##################################################################### 
+
--- a/mcsm_na/run_get_results.py
+++ b/mcsm_na/run_get_results.py
@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Feb 12 12:15:26 2021
+
+@author: tanu
+"""
+#%% load packages
+import os
+homedir = os.path.expanduser('~')
+os.chdir (homedir + '/git/LSHTM_analysis/mcsm_na')
+from get_results_mcsm_na import *
+########################################################################
+# variables
+my_host = 'http://biosig.unimelb.edu.au'
+
+# TODO: add cmd line args
+#gene = 'gid'
+drug = 'streptomycin'
+datadir = homedir + '/git/Data'
+indir = datadir + '/' + drug + '/input'
+outdir = datadir + '/' + drug + '/output'
+
+# batch 1: 00.txt
+my_url_file =  outdir + '/mcsm_na_temp/mcsm_na_result_url_gid_b1.txt'
+my_suffix = 'gid_b1'
+
+#==========================
+# CALL: get_results() 
+# Data: gid+streptomycin
+#==========================
+print('Downloading results for:', my_url_file, '\nsuffix:', my_suffix)
+
+get_results(url_file  = my_url_file
+           , host_url = my_host
+           , output_dir = outdir
+           , outfile_suffix = my_suffix)
+#%%#####################################################################