From d26b17fd1966d3e0dd04de96260a6d9d730c754d Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 9 Feb 2021 16:11:07 +0000 Subject: [PATCH] added dynamut dir --- dynamut/dynamut.py | 46 ++++++++++++++++++ dynamut/dynamut_test.py | 101 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100755 dynamut/dynamut.py create mode 100755 dynamut/dynamut_test.py diff --git a/dynamut/dynamut.py b/dynamut/dynamut.py new file mode 100755 index 0000000..fca749b --- /dev/null +++ b/dynamut/dynamut.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 14:33:51 2020 + +@author: tanu +""" + + +#%% load packages +import os,sys +import subprocess +import argparse +import requests +import re +import time +from bs4 import BeautifulSoup +import pandas as pd +from pandas.api.types import is_string_dtype +from pandas.api.types import is_numeric_dtype +#%%============================================================================ + +#1) define muts batch +#take mcsm file +#split into 'n' batches +#write output file with suffix of batch number + + +#********** done this par **************** +#2) get results for a batch url +# read file +# store batch url +#extract number +#build single url +#build single results urls +#get results and store them in df +#update df +#dim of df = no. of muts in batch + +#3) format results +# store unit measurements separtely +# omit unit measurements from cols +# create extra columns '_outcome' suffix by splitting numerical output +# create separate col for mcsm as it doesn't have output text + +#%%============================================================================ diff --git a/dynamut/dynamut_test.py b/dynamut/dynamut_test.py new file mode 100755 index 0000000..3e8ed34 --- /dev/null +++ b/dynamut/dynamut_test.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 14:33:51 2020 + +@author: tanu +""" + + +#%% load packages +import os,sys +import subprocess +import argparse +import requests +import re +import time +from bs4 import BeautifulSoup +import pandas as pd +from pandas.api.types import is_string_dtype +from pandas.api.types import is_numeric_dtype +#%%============================================================================ + +batch_result_url = 'http://biosig.unimelb.edu.au/dynamut/results_prediction/15955901077' + +mut = 'S104R' +single_result_url = 'http://biosig.unimelb.edu.au/dynamut/single_results/15955901077' + '/' + mut + + + +#%%============================================================================ +param_dict = {} + +result_response = requests.get(single_result_url) +if result_response.status_code == 200: + print('Fetching results') + # extract results using the html parser + soup = BeautifulSoup(result_response.text, features = 'html.parser') + #web_result_raw = soup.find(id = 'predictions').get_text() + ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() + ddg_encom = soup.find(id = 'ddg_encom').get_text() + ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() + ddg_sdm = soup.find(id = 'ddg_sdm').get_text() + ddg_duet = soup.find(id = 'ddg_duet').get_text() + dds_encom = soup.find(id = 'dds_encom').get_text() + + param_dict = {"mutationinformation" : mut + , "ddg_dynamut" : ddg_dynamut + , "ddg_encom" : ddg_encom + , "ddg_mcsm" : ddg_mcsm + , "ddg_sdm" : ddg_sdm + , "ddg_duet" : ddg_duet + , "dds_encom" : dds_encom + + } + results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T + +#%% for loop +#%% +host_dynamut = 'http://biosig.unimelb.edu.au/dynamut' +batch_url_number = re.search(r'([0-9]+)$', batch_result_url).group(0) +single_url = host_dynamut + '/single_results/' + batch_url_number + +muts = ["S104R", "G24R"] + +# initilialise empty df +dynamut_results_df = pd.DataFrame() + +for i, mut in enumerate(muts): + #param_dict = {} + print('Running mutation', i, ':', mut) + snp = mut + single_result_url = single_url + '/' + snp + print('Getting results from:', single_result_url) + + result_response = requests.get(single_result_url) + if result_response.status_code == 200: + print('Fetching results') + # extract results using the html parser + soup = BeautifulSoup(result_response.text, features = 'html.parser') + #web_result_raw = soup.find(id = 'predictions').get_text() + ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() + ddg_encom = soup.find(id = 'ddg_encom').get_text() + ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() + ddg_sdm = soup.find(id = 'ddg_sdm').get_text() + ddg_duet = soup.find(id = 'ddg_duet').get_text() + dds_encom = soup.find(id = 'dds_encom').get_text() + + param_dict = {"mutationinformation" : snp + , "ddg_dynamut" : ddg_dynamut + , "ddg_encom" : ddg_encom + , "ddg_mcsm" : ddg_mcsm + , "ddg_sdm" : ddg_sdm + , "ddg_duet" : ddg_duet + , "dds_encom" : dds_encom + } + results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T + print(results_df) + dynamut_results_df = dynamut_results_df.append(results_df) + print(dynamut_results_df) + +