diff --git a/dynamut/get_results.py b/dynamut/get_results.py index 6d30e79..5390e75 100755 --- a/dynamut/get_results.py +++ b/dynamut/get_results.py @@ -17,13 +17,13 @@ import pandas as pd from pandas.api.types import is_string_dtype from pandas.api.types import is_numeric_dtype #%%============================================================================ -#streptomycin/gid_complex.pdb -host = 'http://biosig.unimelb.edu.au/dynamut' -pred_dynamut_batch = '/results_prediction/161287964015' -result_id = re.search( r"([0-9]+)$", pred_dynamut).group(0) - +host = 'http://biosig.unimelb.edu.au' +pred_dynamut_batch = '/dynamut/results_prediction/161287964015' batch_result_url = host + pred_dynamut_batch +batch_result_url +# build a single url with a given mutation +result_id = re.search( r"([0-9]+)$", pred_dynamut_batch).group(0) mut = 'S2C' single_url = host + '/single_results/' + str(result_id) single_result_url = host + '/single_results/' + str(result_id) + '/' + mut @@ -58,10 +58,8 @@ if result_response.status_code == 200: print(results_df) -#%% for loop - -single_url = host + '/single_results/' + str(result_id) - +#%% looping over mutation +single_url = host + '/single_results/' + str(result_id) muts = ["S2C", "S2F"] # initilialise empty df @@ -100,4 +98,58 @@ for i, mut in enumerate(muts): dynamut_results_df = dynamut_results_df.append(results_df) print(dynamut_results_df) - +#%% Derive the single url from the batch result itself +# get request from a batch url +# corresponding to href +batch_result_url +batch_response = requests.get(batch_result_url) +batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser') +print(batch_soup) +#table = batch_soup.find('table', attrs = {'class':'table table-striped table-bordered table-responsive'}) +#btn = batch_soup.find_all(href = True, attrs = {'class':'btn btn-default btn-sm'}) +#print(btn) + + +# initilialise empty df +dynamut_results_df = pd.DataFrame() +for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}): + print ("Found the URL:", a['href']) + single_result_url = host + a['href'] + snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0) + print(snp) + print('\nGetting results from:', single_result_url) + + result_response = requests.get(single_result_url) + if result_response.status_code == 200: + print('\nFetching results for SNP:', snp) + # extract results using the html parser + soup = BeautifulSoup(result_response.text, features = 'html.parser') + #web_result_raw = soup.find(id = 'predictions').get_text() + ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() + ddg_encom = soup.find(id = 'ddg_encom').get_text() + ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() + ddg_sdm = soup.find(id = 'ddg_sdm').get_text() + ddg_duet = soup.find(id = 'ddg_duet').get_text() + dds_encom = soup.find(id = 'dds_encom').get_text() + + param_dict = {"mutationinformation" : snp + , "ddg_dynamut" : ddg_dynamut + , "ddg_encom" : ddg_encom + , "ddg_mcsm" : ddg_mcsm + , "ddg_sdm" : ddg_sdm + , "ddg_duet" : ddg_duet + , "dds_encom" : dds_encom + } + results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T + print(results_df) + dynamut_results_df = dynamut_results_df.append(results_df) + print(dynamut_results_df) + print('\nWriting dynamut results df') + dynamut_results_df.to_csv('test_dynamut.csv', index = False) + print('\nResults File:' + , '\nNo. of rows:', dynamut_results_df.shape[0] + , '\nNo. of cols:', dynamut_results_df.shape[1]) + + + + diff --git a/dynamut/get_results_def.py b/dynamut/get_results_def.py new file mode 100644 index 0000000..10ed6aa --- /dev/null +++ b/dynamut/get_results_def.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 14:33:51 2020 + +@author: tanu +""" +#%% load packages +import os,sys +import subprocess +import argparse +import requests +import re +import time +from bs4 import BeautifulSoup +import pandas as pd +from pandas.api.types import is_string_dtype +from pandas.api.types import is_numeric_dtype +#%%============================================================================ +homedir = os.path.expanduser('~') +print(homedir) + +my_mutation_list = homedir + '/git/LSHTM_analysis/dynamut/test_input/snp_test2.csv' + +text_file = open(my_mutation_list, 'r') +lines = text_file .read().split('\n') +print (lines) +print(len(lines)) + + +def get_results(url_file + , mutation_list) +