separated defs and calls and added a separate script to test examples
This commit is contained in:
parent
6c458f8883
commit
deb0aa8e58
13 changed files with 281 additions and 517 deletions
200
dynamut/get_results.py
Executable file → Normal file
200
dynamut/get_results.py
Executable file → Normal file
|
@ -16,140 +16,68 @@ from bs4 import BeautifulSoup
|
|||
import pandas as pd
|
||||
from pandas.api.types import is_string_dtype
|
||||
from pandas.api.types import is_numeric_dtype
|
||||
#%%============================================================================
|
||||
host = 'http://biosig.unimelb.edu.au'
|
||||
pred_dynamut_batch = '/dynamut/results_prediction/161287964015'
|
||||
batch_result_url = host + pred_dynamut_batch
|
||||
batch_result_url
|
||||
|
||||
# build a single url with a given mutation
|
||||
result_id = re.search( r"([0-9]+)$", pred_dynamut_batch).group(0)
|
||||
mut = 'S2C'
|
||||
single_url = host + '/single_results/' + str(result_id)
|
||||
single_result_url = host + '/single_results/' + str(result_id) + '/' + mut
|
||||
print(single_result_url)
|
||||
|
||||
#%%============================================================================
|
||||
param_dict = {}
|
||||
|
||||
result_response = requests.get(single_result_url)
|
||||
if result_response.status_code == 200:
|
||||
print('Fetching results')
|
||||
# extract results using the html parser
|
||||
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
||||
#web_result_raw = soup.find(id = 'predictions').get_text()
|
||||
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
||||
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
||||
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
||||
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
||||
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
||||
dds_encom = soup.find(id = 'dds_encom').get_text()
|
||||
|
||||
param_dict = {"mutationinformation" : mut
|
||||
, "ddg_dynamut" : ddg_dynamut
|
||||
, "ddg_encom" : ddg_encom
|
||||
, "ddg_mcsm" : ddg_mcsm
|
||||
, "ddg_sdm" : ddg_sdm
|
||||
, "ddg_duet" : ddg_duet
|
||||
, "dds_encom" : dds_encom
|
||||
|
||||
}
|
||||
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
||||
|
||||
print(results_df)
|
||||
|
||||
#%% looping over mutation
|
||||
single_url = host + '/single_results/' + str(result_id)
|
||||
muts = ["S2C", "S2F"]
|
||||
|
||||
# initilialise empty df
|
||||
dynamut_results_df = pd.DataFrame()
|
||||
|
||||
for i, mut in enumerate(muts):
|
||||
#param_dict = {}
|
||||
print('Running mutation', i+1, ':', mut)
|
||||
snp = mut
|
||||
single_result_url = single_url + '/' + snp
|
||||
print('Getting results from:', single_result_url)
|
||||
#%%#####################################################################
|
||||
def get_results(url_file, host_url, output_dir, outfile_suffix):
|
||||
# initilialise empty df
|
||||
dynamut_results_out_df = pd.DataFrame()
|
||||
with open(url_file, 'r') as f:
|
||||
for count, line in enumerate(f):
|
||||
line = line.strip()
|
||||
print('URL no.', count+1, '\n', line)
|
||||
#batch_response = requests.get(line, headers=headers)
|
||||
batch_response = requests.get(line)
|
||||
batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
|
||||
|
||||
# initilialise empty df
|
||||
#dynamut_results_df = pd.DataFrame()
|
||||
for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
|
||||
print ("Found the URL:", a['href'])
|
||||
single_result_url = host_url + a['href']
|
||||
snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0)
|
||||
print(snp)
|
||||
print('\nGetting results from:', single_result_url)
|
||||
|
||||
result_response = requests.get(single_result_url)
|
||||
if result_response.status_code == 200:
|
||||
print('\nFetching results for SNP:', snp)
|
||||
# extract results using the html parser
|
||||
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
||||
#web_result_raw = soup.find(id = 'predictions').get_text()
|
||||
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
||||
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
||||
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
||||
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
||||
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
||||
dds_encom = soup.find(id = 'dds_encom').get_text()
|
||||
|
||||
param_dict = {"mutationinformation" : snp
|
||||
, "ddg_dynamut" : ddg_dynamut
|
||||
, "ddg_encom" : ddg_encom
|
||||
, "ddg_mcsm" : ddg_mcsm
|
||||
, "ddg_sdm" : ddg_sdm
|
||||
, "ddg_duet" : ddg_duet
|
||||
, "dds_encom" : dds_encom
|
||||
}
|
||||
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
||||
print('Result DF:', results_df, 'for URL:', line)
|
||||
#dynamut_results_df = dynamut_results_df.append(results_df)#!1 too many!:-)
|
||||
dynamut_results_out_df = dynamut_results_out_df.append(results_df)
|
||||
#print(dynamut_results_out_df)
|
||||
#============================
|
||||
# Writing results file: csv
|
||||
#============================
|
||||
dynamut_results_dir = output_dir + '/dynamut_results'
|
||||
if not os.path.exists(dynamut_results_dir):
|
||||
print('\nCreating dir: dynamut_results within:', output_dir )
|
||||
os.makedirs(dynamut_results_dir)
|
||||
print('\nWriting dynamut results df')
|
||||
print('\nResults File:'
|
||||
, '\nNo. of rows:', dynamut_results_out_df.shape[0]
|
||||
, '\nNo. of cols:', dynamut_results_out_df.shape[1])
|
||||
print(dynamut_results_out_df)
|
||||
#dynamut_results_out_df.to_csv('/tmp/test_dynamut.csv', index = False)
|
||||
|
||||
result_response = requests.get(single_result_url)
|
||||
if result_response.status_code == 200:
|
||||
print('Fetching results')
|
||||
# extract results using the html parser
|
||||
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
||||
#web_result_raw = soup.find(id = 'predictions').get_text()
|
||||
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
||||
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
||||
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
||||
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
||||
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
||||
dds_encom = soup.find(id = 'dds_encom').get_text()
|
||||
|
||||
param_dict = {"mutationinformation" : snp
|
||||
, "ddg_dynamut" : ddg_dynamut
|
||||
, "ddg_encom" : ddg_encom
|
||||
, "ddg_mcsm" : ddg_mcsm
|
||||
, "ddg_sdm" : ddg_sdm
|
||||
, "ddg_duet" : ddg_duet
|
||||
, "dds_encom" : dds_encom
|
||||
}
|
||||
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
||||
print(results_df)
|
||||
dynamut_results_df = dynamut_results_df.append(results_df)
|
||||
print(dynamut_results_df)
|
||||
|
||||
#%% Derive the single url from the batch result itself
|
||||
# get request from a batch url
|
||||
# corresponding to href
|
||||
batch_result_url
|
||||
batch_response = requests.get(batch_result_url)
|
||||
batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
|
||||
print(batch_soup)
|
||||
#table = batch_soup.find('table', attrs = {'class':'table table-striped table-bordered table-responsive'})
|
||||
#btn = batch_soup.find_all(href = True, attrs = {'class':'btn btn-default btn-sm'})
|
||||
#print(btn)
|
||||
|
||||
|
||||
# initilialise empty df
|
||||
dynamut_results_df = pd.DataFrame()
|
||||
for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
|
||||
print ("Found the URL:", a['href'])
|
||||
single_result_url = host + a['href']
|
||||
snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0)
|
||||
print(snp)
|
||||
print('\nGetting results from:', single_result_url)
|
||||
|
||||
result_response = requests.get(single_result_url)
|
||||
if result_response.status_code == 200:
|
||||
print('\nFetching results for SNP:', snp)
|
||||
# extract results using the html parser
|
||||
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
||||
#web_result_raw = soup.find(id = 'predictions').get_text()
|
||||
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
||||
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
||||
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
||||
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
||||
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
||||
dds_encom = soup.find(id = 'dds_encom').get_text()
|
||||
|
||||
param_dict = {"mutationinformation" : snp
|
||||
, "ddg_dynamut" : ddg_dynamut
|
||||
, "ddg_encom" : ddg_encom
|
||||
, "ddg_mcsm" : ddg_mcsm
|
||||
, "ddg_sdm" : ddg_sdm
|
||||
, "ddg_duet" : ddg_duet
|
||||
, "dds_encom" : dds_encom
|
||||
}
|
||||
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
||||
print(results_df)
|
||||
dynamut_results_df = dynamut_results_df.append(results_df)
|
||||
print(dynamut_results_df)
|
||||
print('\nWriting dynamut results df')
|
||||
dynamut_results_df.to_csv('test_dynamut.csv', index = False)
|
||||
print('\nResults File:'
|
||||
, '\nNo. of rows:', dynamut_results_df.shape[0]
|
||||
, '\nNo. of cols:', dynamut_results_df.shape[1])
|
||||
|
||||
|
||||
|
||||
|
||||
# build out filename
|
||||
out_filename = dynamut_results_dir + '/dynamut_output_' + outfile_suffix + '.csv'
|
||||
dynamut_results_out_df.to_csv(out_filename, index = False)
|
||||
#%%#####################################################################
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue