extracting single mut url from the batch processing step

2021-02-11 17:19:04 +00:00 · 2021-02-11 17:19:04 +00:00 · 2e047fd548
commit 2e047fd548
parent 5d6ddb7639
2 changed files with 95 additions and 10 deletions
--- a/dynamut/get_results.py
+++ b/dynamut/get_results.py
@ -17,13 +17,13 @@ import pandas as pd
 from pandas.api.types import is_string_dtype
 from pandas.api.types import is_numeric_dtype
 #%%============================================================================
-#streptomycin/gid_complex.pdb
-host = 'http://biosig.unimelb.edu.au/dynamut'
-pred_dynamut_batch = '/results_prediction/161287964015'
-result_id = re.search( r"([0-9]+)$", pred_dynamut).group(0)
-
+host = 'http://biosig.unimelb.edu.au'
+pred_dynamut_batch = '/dynamut/results_prediction/161287964015'
 batch_result_url = host + pred_dynamut_batch
+batch_result_url 

+# build a single url with a given mutation
+result_id = re.search( r"([0-9]+)$", pred_dynamut_batch).group(0)
 mut = 'S2C'
 single_url = host + '/single_results/' + str(result_id)
 single_result_url = host + '/single_results/' + str(result_id) + '/' + mut
@ -58,10 +58,8 @@ if result_response.status_code == 200:

 print(results_df)

-#%% for loop
-
-single_url = host + '/single_results/' + str(result_id)
-     
+#%% looping over mutation
+single_url = host + '/single_results/' + str(result_id)    
 muts = ["S2C", "S2F"]

 # initilialise empty df
@ -100,4 +98,58 @@ for i, mut in enumerate(muts):
            dynamut_results_df = dynamut_results_df.append(results_df)
            print(dynamut_results_df)
            
-      
+#%% Derive the single url from the batch result itself
+# get request from a batch url
+# corresponding to href
+batch_result_url 
+batch_response = requests.get(batch_result_url)
+batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
+print(batch_soup)
+#table = batch_soup.find('table', attrs = {'class':'table table-striped table-bordered table-responsive'})
+#btn = batch_soup.find_all(href = True, attrs = {'class':'btn btn-default btn-sm'})
+#print(btn)
+
+
+# initilialise empty df
+dynamut_results_df = pd.DataFrame()
+for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
+    print ("Found the URL:", a['href']) 
+    single_result_url = host + a['href']
+    snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0)
+    print(snp)
+    print('\nGetting results from:',  single_result_url)
+    
+    result_response = requests.get(single_result_url)
+    if result_response.status_code == 200:
+            print('\nFetching results for SNP:', snp)
+            # extract results using the html parser          
+            soup = BeautifulSoup(result_response.text, features = 'html.parser')
+            #web_result_raw = soup.find(id = 'predictions').get_text()
+            ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
+            ddg_encom = soup.find(id = 'ddg_encom').get_text()
+            ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
+            ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
+            ddg_duet = soup.find(id = 'ddg_duet').get_text()
+            dds_encom = soup.find(id = 'dds_encom').get_text()
+            
+            param_dict = {"mutationinformation" : snp
+                , "ddg_dynamut" : ddg_dynamut
+                , "ddg_encom"   : ddg_encom
+                , "ddg_mcsm"    : ddg_mcsm
+                , "ddg_sdm"     : ddg_sdm
+                , "ddg_duet"    : ddg_duet
+                , "dds_encom"   : dds_encom 
+                }
+            results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
+            print(results_df)
+            dynamut_results_df = dynamut_results_df.append(results_df)
+            print(dynamut_results_df)
+            print('\nWriting dynamut results df')
+            dynamut_results_df.to_csv('test_dynamut.csv', index = False)
+            print('\nResults File:'
+                  , '\nNo. of rows:', dynamut_results_df.shape[0]
+                  , '\nNo. of cols:', dynamut_results_df.shape[1])
+    
+    
+    
+