updated with def for get_results.py for dynamut
This commit is contained in:
parent
2e047fd548
commit
1d8e6f0d75
1 changed files with 68 additions and 10 deletions
|
@ -18,16 +18,74 @@ from pandas.api.types import is_string_dtype
|
||||||
from pandas.api.types import is_numeric_dtype
|
from pandas.api.types import is_numeric_dtype
|
||||||
#%%============================================================================
|
#%%============================================================================
|
||||||
homedir = os.path.expanduser('~')
|
homedir = os.path.expanduser('~')
|
||||||
print(homedir)
|
#print(homedir)
|
||||||
|
host = 'http://biosig.unimelb.edu.au'
|
||||||
|
# Needed if things try to block the 'requests' user agent
|
||||||
|
#headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"}
|
||||||
|
#%%
|
||||||
|
def get_results(url_file):
|
||||||
|
# initilialise empty df
|
||||||
|
dynamut_results_out_df = pd.DataFrame()
|
||||||
|
with open(url_file, 'r') as f:
|
||||||
|
for count, line in enumerate(f):
|
||||||
|
line = line.strip()
|
||||||
|
print('URL no.', count+1, '\n', line)
|
||||||
|
#batch_response = requests.get(line, headers=headers)
|
||||||
|
batch_response = requests.get(line)
|
||||||
|
batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
|
||||||
|
|
||||||
|
# initilialise empty df
|
||||||
|
#dynamut_results_df = pd.DataFrame()
|
||||||
|
for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
|
||||||
|
print ("Found the URL:", a['href'])
|
||||||
|
single_result_url = host + a['href']
|
||||||
|
snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0)
|
||||||
|
print(snp)
|
||||||
|
print('\nGetting results from:', single_result_url)
|
||||||
|
|
||||||
|
result_response = requests.get(single_result_url)
|
||||||
|
if result_response.status_code == 200:
|
||||||
|
print('\nFetching results for SNP:', snp)
|
||||||
|
# extract results using the html parser
|
||||||
|
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
||||||
|
#web_result_raw = soup.find(id = 'predictions').get_text()
|
||||||
|
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
||||||
|
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
||||||
|
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
||||||
|
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
||||||
|
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
||||||
|
dds_encom = soup.find(id = 'dds_encom').get_text()
|
||||||
|
|
||||||
|
param_dict = {"mutationinformation" : snp
|
||||||
|
, "ddg_dynamut" : ddg_dynamut
|
||||||
|
, "ddg_encom" : ddg_encom
|
||||||
|
, "ddg_mcsm" : ddg_mcsm
|
||||||
|
, "ddg_sdm" : ddg_sdm
|
||||||
|
, "ddg_duet" : ddg_duet
|
||||||
|
, "dds_encom" : dds_encom
|
||||||
|
}
|
||||||
|
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
||||||
|
print('Result DF:', results_df, 'for URL:', line)
|
||||||
|
#dynamut_results_df = dynamut_results_df.append(results_df)#!1 too many!:-)
|
||||||
|
dynamut_results_out_df = dynamut_results_out_df.append(results_df)
|
||||||
|
|
||||||
|
#print(dynamut_results_out_df)
|
||||||
|
print('\nWriting dynamut results df')
|
||||||
|
print('\nResults File:'
|
||||||
|
, '\nNo. of rows:', dynamut_results_out_df.shape[0]
|
||||||
|
, '\nNo. of cols:', dynamut_results_out_df.shape[1])
|
||||||
|
print(dynamut_results_out_df)
|
||||||
|
dynamut_results_out_df.to_csv('/tmp/test_dynamut.csv', index = False)
|
||||||
|
|
||||||
my_mutation_list = homedir + '/git/LSHTM_analysis/dynamut/test_input/snp_test2.csv'
|
#%%
|
||||||
|
# example 1: multiple urls in a single file
|
||||||
|
my_url_file_multiple = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_multiple.txt'
|
||||||
|
print(my_url_file_multiple)
|
||||||
|
get_results(my_url_file_multiple)
|
||||||
|
|
||||||
text_file = open(my_mutation_list, 'r')
|
# example 2: single url in a file
|
||||||
lines = text_file .read().split('\n')
|
my_url_file_single = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_single.txt'
|
||||||
print (lines)
|
print(my_url_file_multiple)
|
||||||
print(len(lines))
|
get_results(my_url_file_single)
|
||||||
|
#%%
|
||||||
|
|
||||||
def get_results(url_file
|
|
||||||
, mutation_list)
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue