#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Aug 19 14:33:51 2020 @author: tanu """ #%% load packages import os,sys import subprocess import argparse import requests import re import time from bs4 import BeautifulSoup import pandas as pd from pandas.api.types import is_string_dtype from pandas.api.types import is_numeric_dtype #%%============================================================================ host = 'http://biosig.unimelb.edu.au' pred_dynamut_batch = '/dynamut/results_prediction/161287964015' batch_result_url = host + pred_dynamut_batch batch_result_url # build a single url with a given mutation result_id = re.search( r"([0-9]+)$", pred_dynamut_batch).group(0) mut = 'S2C' single_url = host + '/single_results/' + str(result_id) single_result_url = host + '/single_results/' + str(result_id) + '/' + mut print(single_result_url) #%%============================================================================ param_dict = {} result_response = requests.get(single_result_url) if result_response.status_code == 200: print('Fetching results') # extract results using the html parser soup = BeautifulSoup(result_response.text, features = 'html.parser') #web_result_raw = soup.find(id = 'predictions').get_text() ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() ddg_encom = soup.find(id = 'ddg_encom').get_text() ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() ddg_sdm = soup.find(id = 'ddg_sdm').get_text() ddg_duet = soup.find(id = 'ddg_duet').get_text() dds_encom = soup.find(id = 'dds_encom').get_text() param_dict = {"mutationinformation" : mut , "ddg_dynamut" : ddg_dynamut , "ddg_encom" : ddg_encom , "ddg_mcsm" : ddg_mcsm , "ddg_sdm" : ddg_sdm , "ddg_duet" : ddg_duet , "dds_encom" : dds_encom } results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T print(results_df) #%% looping over mutation single_url = host + '/single_results/' + str(result_id) muts = ["S2C", "S2F"] # initilialise empty df dynamut_results_df = pd.DataFrame() for i, mut in enumerate(muts): #param_dict = {} print('Running mutation', i+1, ':', mut) snp = mut single_result_url = single_url + '/' + snp print('Getting results from:', single_result_url) result_response = requests.get(single_result_url) if result_response.status_code == 200: print('Fetching results') # extract results using the html parser soup = BeautifulSoup(result_response.text, features = 'html.parser') #web_result_raw = soup.find(id = 'predictions').get_text() ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() ddg_encom = soup.find(id = 'ddg_encom').get_text() ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() ddg_sdm = soup.find(id = 'ddg_sdm').get_text() ddg_duet = soup.find(id = 'ddg_duet').get_text() dds_encom = soup.find(id = 'dds_encom').get_text() param_dict = {"mutationinformation" : snp , "ddg_dynamut" : ddg_dynamut , "ddg_encom" : ddg_encom , "ddg_mcsm" : ddg_mcsm , "ddg_sdm" : ddg_sdm , "ddg_duet" : ddg_duet , "dds_encom" : dds_encom } results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T print(results_df) dynamut_results_df = dynamut_results_df.append(results_df) print(dynamut_results_df) #%% Derive the single url from the batch result itself # get request from a batch url # corresponding to href batch_result_url batch_response = requests.get(batch_result_url) batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser') print(batch_soup) #table = batch_soup.find('table', attrs = {'class':'table table-striped table-bordered table-responsive'}) #btn = batch_soup.find_all(href = True, attrs = {'class':'btn btn-default btn-sm'}) #print(btn) # initilialise empty df dynamut_results_df = pd.DataFrame() for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}): print ("Found the URL:", a['href']) single_result_url = host + a['href'] snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0) print(snp) print('\nGetting results from:', single_result_url) result_response = requests.get(single_result_url) if result_response.status_code == 200: print('\nFetching results for SNP:', snp) # extract results using the html parser soup = BeautifulSoup(result_response.text, features = 'html.parser') #web_result_raw = soup.find(id = 'predictions').get_text() ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() ddg_encom = soup.find(id = 'ddg_encom').get_text() ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() ddg_sdm = soup.find(id = 'ddg_sdm').get_text() ddg_duet = soup.find(id = 'ddg_duet').get_text() dds_encom = soup.find(id = 'dds_encom').get_text() param_dict = {"mutationinformation" : snp , "ddg_dynamut" : ddg_dynamut , "ddg_encom" : ddg_encom , "ddg_mcsm" : ddg_mcsm , "ddg_sdm" : ddg_sdm , "ddg_duet" : ddg_duet , "dds_encom" : dds_encom } results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T print(results_df) dynamut_results_df = dynamut_results_df.append(results_df) print(dynamut_results_df) print('\nWriting dynamut results df') dynamut_results_df.to_csv('test_dynamut.csv', index = False) print('\nResults File:' , '\nNo. of rows:', dynamut_results_df.shape[0] , '\nNo. of cols:', dynamut_results_df.shape[1])