separated defs and calls and added a separate script to test examples
This commit is contained in:
parent
6c458f8883
commit
deb0aa8e58
13 changed files with 281 additions and 517 deletions
|
@ -1,46 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Created on Wed Aug 19 14:33:51 2020
|
|
||||||
|
|
||||||
@author: tanu
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
#%% load packages
|
|
||||||
import os,sys
|
|
||||||
import subprocess
|
|
||||||
import argparse
|
|
||||||
import requests
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import pandas as pd
|
|
||||||
from pandas.api.types import is_string_dtype
|
|
||||||
from pandas.api.types import is_numeric_dtype
|
|
||||||
#%%============================================================================
|
|
||||||
|
|
||||||
#1) define muts batch
|
|
||||||
#take mcsm file
|
|
||||||
#split into 'n' batches
|
|
||||||
#write output file with suffix of batch number
|
|
||||||
|
|
||||||
|
|
||||||
#********** done this par ****************
|
|
||||||
#2) get results for a batch url
|
|
||||||
# read file
|
|
||||||
# store batch url
|
|
||||||
#extract number
|
|
||||||
#build single url
|
|
||||||
#build single results urls
|
|
||||||
#get results and store them in df
|
|
||||||
#update df
|
|
||||||
#dim of df = no. of muts in batch
|
|
||||||
|
|
||||||
#3) format results
|
|
||||||
# store unit measurements separtely
|
|
||||||
# omit unit measurements from cols
|
|
||||||
# create extra columns '_outcome' suffix by splitting numerical output
|
|
||||||
# create separate col for mcsm as it doesn't have output text
|
|
||||||
|
|
||||||
#%%============================================================================
|
|
7
dynamut/dynamut_results/dynamut_output_multiple.csv
Normal file
7
dynamut/dynamut_results/dynamut_output_multiple.csv
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
mutationinformation,ddg_dynamut,ddg_encom,ddg_mcsm,ddg_sdm,ddg_duet,dds_encom
|
||||||
|
G13V,0.006 kcal/mol (Stabilizing),-0.053 kcal/mol (Destabilizing),-0.261 kcal/mol (Destabilizing),-0.120 kcal/mol (Destabilizing),0.120 kcal/mol (Stabilizing),0.066 kcal.mol-1.K-1 (Increase of molecule flexibility)
|
||||||
|
A19T,-0.077 kcal/mol (Destabilizing),0.224 kcal/mol (Destabilizing),-0.631 kcal/mol (Destabilizing),-2.620 kcal/mol (Destabilizing),-0.758 kcal/mol (Destabilizing),-0.280 kcal.mol-1.K-1 (Decrease of molecule flexibility)
|
||||||
|
I4N,-0.239 kcal/mol (Destabilizing),-0.720 kcal/mol (Destabilizing),-0.728 kcal/mol (Destabilizing),-0.550 kcal/mol (Destabilizing),-0.461 kcal/mol (Destabilizing),0.900 kcal.mol-1.K-1 (Increase of molecule flexibility)
|
||||||
|
P3S,0.727 kcal/mol (Stabilizing),0.334 kcal/mol (Destabilizing),-0.672 kcal/mol (Destabilizing),0.010 kcal/mol (Stabilizing),-0.252 kcal/mol (Destabilizing),-0.418 kcal.mol-1.K-1 (Decrease of molecule flexibility)
|
||||||
|
F12S,-0.270 kcal/mol (Destabilizing),0.048 kcal/mol (Destabilizing),-1.028 kcal/mol (Destabilizing),-0.930 kcal/mol (Destabilizing),-0.993 kcal/mol (Destabilizing),-0.060 kcal.mol-1.K-1 (Decrease of molecule flexibility)
|
||||||
|
A19V,2.389 kcal/mol (Stabilizing),0.450 kcal/mol (Destabilizing),0.659 kcal/mol (Stabilizing),-0.170 kcal/mol (Destabilizing),1.040 kcal/mol (Stabilizing),-0.562 kcal.mol-1.K-1 (Decrease of molecule flexibility)
|
|
3
dynamut/dynamut_results/dynamut_output_single.csv
Normal file
3
dynamut/dynamut_results/dynamut_output_single.csv
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
mutationinformation,ddg_dynamut,ddg_encom,ddg_mcsm,ddg_sdm,ddg_duet,dds_encom
|
||||||
|
G13V,0.006 kcal/mol (Stabilizing),-0.053 kcal/mol (Destabilizing),-0.261 kcal/mol (Destabilizing),-0.120 kcal/mol (Destabilizing),0.120 kcal/mol (Stabilizing),0.066 kcal.mol-1.K-1 (Increase of molecule flexibility)
|
||||||
|
A19T,-0.077 kcal/mol (Destabilizing),0.224 kcal/mol (Destabilizing),-0.631 kcal/mol (Destabilizing),-2.620 kcal/mol (Destabilizing),-0.758 kcal/mol (Destabilizing),-0.280 kcal.mol-1.K-1 (Decrease of molecule flexibility)
|
|
2
dynamut/example_input/snp_test1.csv
Normal file
2
dynamut/example_input/snp_test1.csv
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
F12S
|
||||||
|
A19V
|
|
2
dynamut/example_input/snp_test2.csv
Normal file
2
dynamut/example_input/snp_test2.csv
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
G13V
|
||||||
|
A19T
|
|
71
dynamut/examples.py
Normal file
71
dynamut/examples.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Fri Feb 12 12:15:26 2021
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
homedir = os.path.expanduser('~')
|
||||||
|
os.chdir (homedir + '/git/LSHTM_analysis/dynamut')
|
||||||
|
from get_results import *
|
||||||
|
from submit import *
|
||||||
|
#%%#####################################################################
|
||||||
|
#EXAMPLE RUN for different stages
|
||||||
|
#=====================
|
||||||
|
# STAGE: submit.py
|
||||||
|
#=====================
|
||||||
|
my_host = 'http://biosig.unimelb.edu.au'
|
||||||
|
my_prediction_url = f"{host}/dynamut/prediction_list"
|
||||||
|
print(prediction_url)
|
||||||
|
|
||||||
|
my_outdir = homedir + '/git/LSHTM_analysis/dynamut'
|
||||||
|
my_chain = 'A'
|
||||||
|
my_email = 'tanushree.tunstall@lshtm.ac.uk'
|
||||||
|
|
||||||
|
my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb'
|
||||||
|
my_mutation_list = homedir + '/git/LSHTM_analysis/dynamut/example_input/snp_test1.csv'
|
||||||
|
my_suffix = 'gid_test1'
|
||||||
|
|
||||||
|
#----------------------------------------------
|
||||||
|
# example 1: 2 snps in a file
|
||||||
|
#----------------------------------------------
|
||||||
|
submit_dynamut(host_url = my_host
|
||||||
|
, pdb_file = my_pdb_file
|
||||||
|
, mutation_list = my_mutation_list
|
||||||
|
, chain = my_chain
|
||||||
|
, email_address = my_email
|
||||||
|
, prediction_url = my_prediction_url
|
||||||
|
, output_dir = my_outdir
|
||||||
|
, outfile_suffix = my_suffix)
|
||||||
|
#%%###################################################################
|
||||||
|
#=====================
|
||||||
|
# STAGE:get_results.py
|
||||||
|
#=====================
|
||||||
|
my_host = 'http://biosig.unimelb.edu.au'
|
||||||
|
my_outdir = homedir + '/git/LSHTM_analysis/dynamut'
|
||||||
|
|
||||||
|
#----------------------------------------------
|
||||||
|
# example 1: multiple urls in a single file
|
||||||
|
#----------------------------------------------
|
||||||
|
my_url_file_multiple = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_multiple.txt'
|
||||||
|
print(my_url_file_multiple)
|
||||||
|
my_suffix = 'multiple'
|
||||||
|
|
||||||
|
get_results(url_file = my_url_file_multiple
|
||||||
|
, host_url = my_host
|
||||||
|
, output_dir = my_outdir
|
||||||
|
, outfile_suffix = my_suffix)
|
||||||
|
|
||||||
|
#----------------------------------------------
|
||||||
|
# example 2: single url in a file
|
||||||
|
#----------------------------------------------
|
||||||
|
my_url_file_single = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_single.txt'
|
||||||
|
print(my_url_file_multiple)
|
||||||
|
my_suffix = 'single'
|
||||||
|
|
||||||
|
get_results(my_url_file_single
|
||||||
|
, host_url = my_host
|
||||||
|
, output_dir = my_outdir
|
||||||
|
, outfile_suffix = my_suffix)
|
||||||
|
#%%###################################################################
|
194
dynamut/get_results.py
Executable file → Normal file
194
dynamut/get_results.py
Executable file → Normal file
|
@ -16,140 +16,68 @@ from bs4 import BeautifulSoup
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas.api.types import is_string_dtype
|
from pandas.api.types import is_string_dtype
|
||||||
from pandas.api.types import is_numeric_dtype
|
from pandas.api.types import is_numeric_dtype
|
||||||
#%%============================================================================
|
#%%#####################################################################
|
||||||
host = 'http://biosig.unimelb.edu.au'
|
def get_results(url_file, host_url, output_dir, outfile_suffix):
|
||||||
pred_dynamut_batch = '/dynamut/results_prediction/161287964015'
|
# initilialise empty df
|
||||||
batch_result_url = host + pred_dynamut_batch
|
dynamut_results_out_df = pd.DataFrame()
|
||||||
batch_result_url
|
with open(url_file, 'r') as f:
|
||||||
|
for count, line in enumerate(f):
|
||||||
# build a single url with a given mutation
|
line = line.strip()
|
||||||
result_id = re.search( r"([0-9]+)$", pred_dynamut_batch).group(0)
|
print('URL no.', count+1, '\n', line)
|
||||||
mut = 'S2C'
|
#batch_response = requests.get(line, headers=headers)
|
||||||
single_url = host + '/single_results/' + str(result_id)
|
batch_response = requests.get(line)
|
||||||
single_result_url = host + '/single_results/' + str(result_id) + '/' + mut
|
batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
|
||||||
print(single_result_url)
|
|
||||||
|
|
||||||
#%%============================================================================
|
|
||||||
param_dict = {}
|
|
||||||
|
|
||||||
result_response = requests.get(single_result_url)
|
|
||||||
if result_response.status_code == 200:
|
|
||||||
print('Fetching results')
|
|
||||||
# extract results using the html parser
|
|
||||||
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
|
||||||
#web_result_raw = soup.find(id = 'predictions').get_text()
|
|
||||||
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
|
||||||
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
|
||||||
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
|
||||||
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
|
||||||
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
|
||||||
dds_encom = soup.find(id = 'dds_encom').get_text()
|
|
||||||
|
|
||||||
param_dict = {"mutationinformation" : mut
|
|
||||||
, "ddg_dynamut" : ddg_dynamut
|
|
||||||
, "ddg_encom" : ddg_encom
|
|
||||||
, "ddg_mcsm" : ddg_mcsm
|
|
||||||
, "ddg_sdm" : ddg_sdm
|
|
||||||
, "ddg_duet" : ddg_duet
|
|
||||||
, "dds_encom" : dds_encom
|
|
||||||
|
|
||||||
}
|
|
||||||
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
|
||||||
|
|
||||||
print(results_df)
|
|
||||||
|
|
||||||
#%% looping over mutation
|
|
||||||
single_url = host + '/single_results/' + str(result_id)
|
|
||||||
muts = ["S2C", "S2F"]
|
|
||||||
|
|
||||||
# initilialise empty df
|
|
||||||
dynamut_results_df = pd.DataFrame()
|
|
||||||
|
|
||||||
for i, mut in enumerate(muts):
|
|
||||||
#param_dict = {}
|
|
||||||
print('Running mutation', i+1, ':', mut)
|
|
||||||
snp = mut
|
|
||||||
single_result_url = single_url + '/' + snp
|
|
||||||
print('Getting results from:', single_result_url)
|
|
||||||
|
|
||||||
result_response = requests.get(single_result_url)
|
|
||||||
if result_response.status_code == 200:
|
|
||||||
print('Fetching results')
|
|
||||||
# extract results using the html parser
|
|
||||||
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
|
||||||
#web_result_raw = soup.find(id = 'predictions').get_text()
|
|
||||||
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
|
||||||
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
|
||||||
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
|
||||||
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
|
||||||
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
|
||||||
dds_encom = soup.find(id = 'dds_encom').get_text()
|
|
||||||
|
|
||||||
param_dict = {"mutationinformation" : snp
|
|
||||||
, "ddg_dynamut" : ddg_dynamut
|
|
||||||
, "ddg_encom" : ddg_encom
|
|
||||||
, "ddg_mcsm" : ddg_mcsm
|
|
||||||
, "ddg_sdm" : ddg_sdm
|
|
||||||
, "ddg_duet" : ddg_duet
|
|
||||||
, "dds_encom" : dds_encom
|
|
||||||
}
|
|
||||||
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
|
||||||
print(results_df)
|
|
||||||
dynamut_results_df = dynamut_results_df.append(results_df)
|
|
||||||
print(dynamut_results_df)
|
|
||||||
|
|
||||||
#%% Derive the single url from the batch result itself
|
|
||||||
# get request from a batch url
|
|
||||||
# corresponding to href
|
|
||||||
batch_result_url
|
|
||||||
batch_response = requests.get(batch_result_url)
|
|
||||||
batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
|
|
||||||
print(batch_soup)
|
|
||||||
#table = batch_soup.find('table', attrs = {'class':'table table-striped table-bordered table-responsive'})
|
|
||||||
#btn = batch_soup.find_all(href = True, attrs = {'class':'btn btn-default btn-sm'})
|
|
||||||
#print(btn)
|
|
||||||
|
|
||||||
|
|
||||||
# initilialise empty df
|
|
||||||
dynamut_results_df = pd.DataFrame()
|
|
||||||
for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
|
|
||||||
print ("Found the URL:", a['href'])
|
|
||||||
single_result_url = host + a['href']
|
|
||||||
snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0)
|
|
||||||
print(snp)
|
|
||||||
print('\nGetting results from:', single_result_url)
|
|
||||||
|
|
||||||
result_response = requests.get(single_result_url)
|
|
||||||
if result_response.status_code == 200:
|
|
||||||
print('\nFetching results for SNP:', snp)
|
|
||||||
# extract results using the html parser
|
|
||||||
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
|
||||||
#web_result_raw = soup.find(id = 'predictions').get_text()
|
|
||||||
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
|
||||||
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
|
||||||
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
|
||||||
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
|
||||||
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
|
||||||
dds_encom = soup.find(id = 'dds_encom').get_text()
|
|
||||||
|
|
||||||
param_dict = {"mutationinformation" : snp
|
|
||||||
, "ddg_dynamut" : ddg_dynamut
|
|
||||||
, "ddg_encom" : ddg_encom
|
|
||||||
, "ddg_mcsm" : ddg_mcsm
|
|
||||||
, "ddg_sdm" : ddg_sdm
|
|
||||||
, "ddg_duet" : ddg_duet
|
|
||||||
, "dds_encom" : dds_encom
|
|
||||||
}
|
|
||||||
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
|
||||||
print(results_df)
|
|
||||||
dynamut_results_df = dynamut_results_df.append(results_df)
|
|
||||||
print(dynamut_results_df)
|
|
||||||
print('\nWriting dynamut results df')
|
|
||||||
dynamut_results_df.to_csv('test_dynamut.csv', index = False)
|
|
||||||
print('\nResults File:'
|
|
||||||
, '\nNo. of rows:', dynamut_results_df.shape[0]
|
|
||||||
, '\nNo. of cols:', dynamut_results_df.shape[1])
|
|
||||||
|
|
||||||
|
# initilialise empty df
|
||||||
|
#dynamut_results_df = pd.DataFrame()
|
||||||
|
for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
|
||||||
|
print ("Found the URL:", a['href'])
|
||||||
|
single_result_url = host_url + a['href']
|
||||||
|
snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0)
|
||||||
|
print(snp)
|
||||||
|
print('\nGetting results from:', single_result_url)
|
||||||
|
|
||||||
|
result_response = requests.get(single_result_url)
|
||||||
|
if result_response.status_code == 200:
|
||||||
|
print('\nFetching results for SNP:', snp)
|
||||||
|
# extract results using the html parser
|
||||||
|
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
||||||
|
#web_result_raw = soup.find(id = 'predictions').get_text()
|
||||||
|
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
||||||
|
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
||||||
|
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
||||||
|
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
||||||
|
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
||||||
|
dds_encom = soup.find(id = 'dds_encom').get_text()
|
||||||
|
|
||||||
|
param_dict = {"mutationinformation" : snp
|
||||||
|
, "ddg_dynamut" : ddg_dynamut
|
||||||
|
, "ddg_encom" : ddg_encom
|
||||||
|
, "ddg_mcsm" : ddg_mcsm
|
||||||
|
, "ddg_sdm" : ddg_sdm
|
||||||
|
, "ddg_duet" : ddg_duet
|
||||||
|
, "dds_encom" : dds_encom
|
||||||
|
}
|
||||||
|
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
||||||
|
print('Result DF:', results_df, 'for URL:', line)
|
||||||
|
#dynamut_results_df = dynamut_results_df.append(results_df)#!1 too many!:-)
|
||||||
|
dynamut_results_out_df = dynamut_results_out_df.append(results_df)
|
||||||
|
#print(dynamut_results_out_df)
|
||||||
|
#============================
|
||||||
|
# Writing results file: csv
|
||||||
|
#============================
|
||||||
|
dynamut_results_dir = output_dir + '/dynamut_results'
|
||||||
|
if not os.path.exists(dynamut_results_dir):
|
||||||
|
print('\nCreating dir: dynamut_results within:', output_dir )
|
||||||
|
os.makedirs(dynamut_results_dir)
|
||||||
|
print('\nWriting dynamut results df')
|
||||||
|
print('\nResults File:'
|
||||||
|
, '\nNo. of rows:', dynamut_results_out_df.shape[0]
|
||||||
|
, '\nNo. of cols:', dynamut_results_out_df.shape[1])
|
||||||
|
print(dynamut_results_out_df)
|
||||||
|
#dynamut_results_out_df.to_csv('/tmp/test_dynamut.csv', index = False)
|
||||||
|
|
||||||
|
# build out filename
|
||||||
|
out_filename = dynamut_results_dir + '/dynamut_output_' + outfile_suffix + '.csv'
|
||||||
|
dynamut_results_out_df.to_csv(out_filename, index = False)
|
||||||
|
#%%#####################################################################
|
||||||
|
|
|
@ -1,108 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Created on Wed Aug 19 14:33:51 2020
|
|
||||||
|
|
||||||
@author: tanu
|
|
||||||
"""
|
|
||||||
#%% load packages
|
|
||||||
import os,sys
|
|
||||||
import subprocess
|
|
||||||
import argparse
|
|
||||||
import requests
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import pandas as pd
|
|
||||||
from pandas.api.types import is_string_dtype
|
|
||||||
from pandas.api.types import is_numeric_dtype
|
|
||||||
#%%
|
|
||||||
def get_results(url_file, host_url, output_dir, outfile_suffix):
|
|
||||||
# initilialise empty df
|
|
||||||
dynamut_results_out_df = pd.DataFrame()
|
|
||||||
with open(url_file, 'r') as f:
|
|
||||||
for count, line in enumerate(f):
|
|
||||||
line = line.strip()
|
|
||||||
print('URL no.', count+1, '\n', line)
|
|
||||||
#batch_response = requests.get(line, headers=headers)
|
|
||||||
batch_response = requests.get(line)
|
|
||||||
batch_soup = BeautifulSoup(batch_response.text, features = 'html.parser')
|
|
||||||
|
|
||||||
# initilialise empty df
|
|
||||||
#dynamut_results_df = pd.DataFrame()
|
|
||||||
for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}):
|
|
||||||
print ("Found the URL:", a['href'])
|
|
||||||
single_result_url = host_url + a['href']
|
|
||||||
snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0)
|
|
||||||
print(snp)
|
|
||||||
print('\nGetting results from:', single_result_url)
|
|
||||||
|
|
||||||
result_response = requests.get(single_result_url)
|
|
||||||
if result_response.status_code == 200:
|
|
||||||
print('\nFetching results for SNP:', snp)
|
|
||||||
# extract results using the html parser
|
|
||||||
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
|
||||||
#web_result_raw = soup.find(id = 'predictions').get_text()
|
|
||||||
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
|
||||||
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
|
||||||
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
|
||||||
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
|
||||||
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
|
||||||
dds_encom = soup.find(id = 'dds_encom').get_text()
|
|
||||||
|
|
||||||
param_dict = {"mutationinformation" : snp
|
|
||||||
, "ddg_dynamut" : ddg_dynamut
|
|
||||||
, "ddg_encom" : ddg_encom
|
|
||||||
, "ddg_mcsm" : ddg_mcsm
|
|
||||||
, "ddg_sdm" : ddg_sdm
|
|
||||||
, "ddg_duet" : ddg_duet
|
|
||||||
, "dds_encom" : dds_encom
|
|
||||||
}
|
|
||||||
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
|
||||||
print('Result DF:', results_df, 'for URL:', line)
|
|
||||||
#dynamut_results_df = dynamut_results_df.append(results_df)#!1 too many!:-)
|
|
||||||
dynamut_results_out_df = dynamut_results_out_df.append(results_df)
|
|
||||||
#print(dynamut_results_out_df)
|
|
||||||
#============================
|
|
||||||
# Writing results file: csv
|
|
||||||
#============================
|
|
||||||
dynamut_results_dir = output_dir + '/dynamut_results'
|
|
||||||
if not os.path.exists(dynamut_results_dir):
|
|
||||||
print('\nCreating dir: dynamut_results within:', output_dir )
|
|
||||||
os.makedirs(dynamut_results_dir)
|
|
||||||
print('\nWriting dynamut results df')
|
|
||||||
print('\nResults File:'
|
|
||||||
, '\nNo. of rows:', dynamut_results_out_df.shape[0]
|
|
||||||
, '\nNo. of cols:', dynamut_results_out_df.shape[1])
|
|
||||||
print(dynamut_results_out_df)
|
|
||||||
#dynamut_results_out_df.to_csv('/tmp/test_dynamut.csv', index = False)
|
|
||||||
|
|
||||||
# build out filename
|
|
||||||
out_filename = dynamut_results_dir + '/dynamut_output_' + outfile_suffix + '.csv'
|
|
||||||
dynamut_results_out_df.to_csv(out_filename, index = False)
|
|
||||||
#%% EXAMPLE RUN
|
|
||||||
# globals
|
|
||||||
#homedir = os.path.expanduser('~')
|
|
||||||
#my_host = 'http://biosig.unimelb.edu.au'
|
|
||||||
#my_outdir = homedir + '/git/LSHTM_analysis/dynamut'
|
|
||||||
|
|
||||||
#=============================================
|
|
||||||
# example 1: multiple urls in a single file
|
|
||||||
#=============================================
|
|
||||||
#my_url_file_multiple = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_multiple.txt'
|
|
||||||
#print(my_url_file_multiple)
|
|
||||||
#get_results(url_file = my_url_file_multiple
|
|
||||||
# , host_url = my_host
|
|
||||||
# , output_dir = my_outdir
|
|
||||||
# , outfile_suffix='multiple')
|
|
||||||
|
|
||||||
#=============================================
|
|
||||||
# example 2: single url in a file
|
|
||||||
#=============================================
|
|
||||||
#my_url_file_single = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_single.txt'
|
|
||||||
#print(my_url_file_multiple)
|
|
||||||
#get_results(my_url_file_single
|
|
||||||
# , host_url = my_host
|
|
||||||
# , output_dir = my_outdir
|
|
||||||
# , outfile_suffix = 'single')
|
|
||||||
#%%
|
|
|
@ -1,36 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Created on Wed Aug 19 14:33:51 2020
|
|
||||||
|
|
||||||
@author: tanu
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
#%% load packages
|
|
||||||
import os,sys
|
|
||||||
import subprocess
|
|
||||||
import argparse
|
|
||||||
import requests
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
from pandas.api.types import is_string_dtype
|
|
||||||
from pandas.api.types import is_numeric_dtype
|
|
||||||
#%%============================================================================
|
|
||||||
# read mutation file
|
|
||||||
|
|
||||||
all_muts = pd.read_csv("/home/tanu/git/Data/streptomycin/output/snp_batches/snps_batch_00", header=None)
|
|
||||||
#https://gist.github.com/jrivero/1085501
|
|
||||||
n = 20
|
|
||||||
chunks = [all_muts[i:i+n] for i in range(0,all_muts.shape[0],n)]
|
|
||||||
#%%
|
|
||||||
|
|
||||||
muts_list = all_muts[0].values.tolist()
|
|
||||||
|
|
||||||
host = 'http://biosig.unimelb.edu.au/dynamut'
|
|
||||||
mut_prediction = '/prediction'
|
|
||||||
|
|
||||||
submit_url = host + mut_prediction
|
|
38
dynamut/run_results.py
Normal file
38
dynamut/run_results.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Wed Aug 19 14:33:51 2020
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
#%% load packages
|
||||||
|
import os
|
||||||
|
homedir = os.path.expanduser('~')
|
||||||
|
os.chdir (homedir + '/git/LSHTM_analysis/dynamut')
|
||||||
|
from get_results import *
|
||||||
|
########################################################################
|
||||||
|
# variables
|
||||||
|
my_host = 'http://biosig.unimelb.edu.au'
|
||||||
|
# Needed if things try to block the 'requests' user agent
|
||||||
|
#headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"}
|
||||||
|
|
||||||
|
# TODO: add cmd line args
|
||||||
|
#gene = 'gid'
|
||||||
|
drug = 'streptomycin'
|
||||||
|
datadir = homedir + '/git/Data'
|
||||||
|
indir = datadir + '/' + drug + '/input'
|
||||||
|
outdir = datadir + '/' + drug + '/output'
|
||||||
|
|
||||||
|
my_url_file = outdir + '/dynamut_temp/dynamut_result_url_gid_b1.txt'
|
||||||
|
my_suffix = 'gid_b1'
|
||||||
|
#==========================
|
||||||
|
# CALL: get_results()
|
||||||
|
# Data: gid+streptomycin
|
||||||
|
#==========================
|
||||||
|
print(my_url_file, 'suffix:', my_suffix)
|
||||||
|
|
||||||
|
get_results(url_file = my_url_file
|
||||||
|
, host_url = my_host
|
||||||
|
, output_dir = outdir
|
||||||
|
, outfile_suffix = my_suffix)
|
||||||
|
########################################################################
|
58
dynamut/run_submit.py
Normal file
58
dynamut/run_submit.py
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Wed Aug 19 14:33:51 2020
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
#%% load packages
|
||||||
|
import os
|
||||||
|
homedir = os.path.expanduser('~')
|
||||||
|
os.chdir (homedir + '/git/LSHTM_analysis/dynamut')
|
||||||
|
from submit import *
|
||||||
|
########################################################################
|
||||||
|
# variables
|
||||||
|
my_host = 'http://biosig.unimelb.edu.au'
|
||||||
|
my_prediction_url = f"{my_host}/dynamut/prediction_list"
|
||||||
|
print(my_prediction_url)
|
||||||
|
|
||||||
|
# TODO: add cmd line args
|
||||||
|
#gene = 'gid'
|
||||||
|
drug = 'streptomycin'
|
||||||
|
datadir = homedir + '/git/Data'
|
||||||
|
indir = datadir + '/' + drug + '/input'
|
||||||
|
outdir = datadir + '/' + drug + '/output'
|
||||||
|
|
||||||
|
my_chain = 'A'
|
||||||
|
my_email = 'tanushree.tunstall@lshtm.ac.uk'
|
||||||
|
|
||||||
|
my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb'
|
||||||
|
|
||||||
|
# batch 1: 00.txt
|
||||||
|
#my_mutation_list =outdir + '/snp_batches/50/snp_batch_00.txt'
|
||||||
|
#my_suffix = 'gid_b1'
|
||||||
|
#RAN: 11 Feb, ~14:00 pm * RETRIEVED
|
||||||
|
|
||||||
|
# batch 2: 01.txt
|
||||||
|
#my_mutation_list = outdir + '/snp_batches/50/snp_batch_01.txt'
|
||||||
|
#my_suffix = 'gid_b2'
|
||||||
|
#RAN: 12 Feb, ~10:00 am, AWAITING
|
||||||
|
|
||||||
|
# batch 3: 02.txt
|
||||||
|
my_mutation_list = outdir + '/snp_batches/50/snp_batch_02.txt'
|
||||||
|
my_suffix = 'gid_b3'
|
||||||
|
#RAN: 12 Feb, ~12:40 pm, AWAITING
|
||||||
|
|
||||||
|
#==========================
|
||||||
|
# CALL: submit_dynamut()
|
||||||
|
# Data: gid+streptomycin
|
||||||
|
#==========================
|
||||||
|
submit_dynamut(host_url = my_host
|
||||||
|
, pdb_file = my_pdb_file
|
||||||
|
, mutation_list = my_mutation_list
|
||||||
|
, chain = my_chain
|
||||||
|
, email_address = my_email
|
||||||
|
, prediction_url = my_prediction_url
|
||||||
|
, output_dir = outdir
|
||||||
|
, outfile_suffix = my_suffix)
|
||||||
|
#%%#####################################################################
|
104
dynamut/submit.py
Executable file → Normal file
104
dynamut/submit.py
Executable file → Normal file
|
@ -16,68 +16,21 @@ from bs4 import BeautifulSoup
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas.api.types import is_string_dtype
|
from pandas.api.types import is_string_dtype
|
||||||
from pandas.api.types import is_numeric_dtype
|
from pandas.api.types import is_numeric_dtype
|
||||||
#%% homedir
|
#%%#####################################################################
|
||||||
homedir = os.path.expanduser('~')
|
def submit_dynamut(host_url
|
||||||
print('My homedir is:', homedir)
|
, pdb_file
|
||||||
|
, mutation_list
|
||||||
#%%
|
|
||||||
host = 'http://biosig.unimelb.edu.au'
|
|
||||||
prediction_url = f"{host}/dynamut/prediction_list"
|
|
||||||
print(prediction_url)
|
|
||||||
|
|
||||||
#%% example params
|
|
||||||
gene_name = 'gid'
|
|
||||||
drug = 'streptomycin'
|
|
||||||
datadir = homedir + '/git/Data'
|
|
||||||
indir = datadir + '/' + drug + '/input'
|
|
||||||
#outdir = datadir + '/' + drug + '/output'
|
|
||||||
outdir = homedir + '/git/LSHTM_analysis/dynamut' # for example
|
|
||||||
|
|
||||||
dynamut_temp_dir = outdir + '/dynamut_temp'
|
|
||||||
|
|
||||||
if not os.path.exists(dynamut_temp_dir):
|
|
||||||
print('Creating dynamut_temp in outdir', outdir )
|
|
||||||
os.makedirs(dynamut_temp_dir)
|
|
||||||
|
|
||||||
batch_no = 1
|
|
||||||
out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt'
|
|
||||||
|
|
||||||
|
|
||||||
#%% request calculation (no def)
|
|
||||||
with open("/home/tanu/git/Data/streptomycin/input/gid_complex.pdb", "rb") as pdb_file, open ("/home/tanu/git/LSHTM_analysis/dynamut/snp_test2.csv", "rb") as mutation_list:
|
|
||||||
files = {"wild": pdb_file
|
|
||||||
, "mutation_list": mutation_list}
|
|
||||||
body = {"chain": 'A'
|
|
||||||
, "email": 'tanushree.tunstall@lshtm.ac.uk'}
|
|
||||||
|
|
||||||
response = requests.post(prediction_url, files = files, data = body)
|
|
||||||
print(response.status_code)
|
|
||||||
if response.history:
|
|
||||||
print('PASS: valid mutation submitted. Fetching result url')
|
|
||||||
url_match = re.search('/dynamut/results_prediction/.+(?=")', response.text)
|
|
||||||
url = host + url_match.group()
|
|
||||||
print(url)
|
|
||||||
|
|
||||||
#===============
|
|
||||||
# writing file: result urls
|
|
||||||
#===============
|
|
||||||
out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt'
|
|
||||||
print('Writing output url file:', out_url_file)
|
|
||||||
myfile = open(out_url_file, 'a')
|
|
||||||
myfile.write(url)
|
|
||||||
myfile.close()
|
|
||||||
|
|
||||||
#%%
|
|
||||||
def request_calculation(pdb_file, mutation_list
|
|
||||||
, chain
|
, chain
|
||||||
, my_email
|
, email_address
|
||||||
, prediction_url
|
, prediction_url
|
||||||
, output_dir
|
, output_dir
|
||||||
, gene_name
|
, outfile_suffix
|
||||||
, batch_no
|
):
|
||||||
, out_url_file):
|
|
||||||
"""
|
"""
|
||||||
Makes a POST request for a ligand affinity prediction.
|
Makes a POST request for dynamut predictions.
|
||||||
|
|
||||||
|
@param host_url: valid host url for submitting the job
|
||||||
|
@type string
|
||||||
|
|
||||||
@param pdb_file: valid path to pdb structure
|
@param pdb_file: valid path to pdb structure
|
||||||
@type string
|
@type string
|
||||||
|
@ -91,31 +44,44 @@ def request_calculation(pdb_file, mutation_list
|
||||||
@param prediction_url: dynamut url for prediction
|
@param prediction_url: dynamut url for prediction
|
||||||
@type string
|
@type string
|
||||||
|
|
||||||
@return txt file containing batch no. of snps processed
|
@param output_dir: output dir
|
||||||
|
@type string
|
||||||
|
|
||||||
|
@param outfile_suffix: outfile_suffix
|
||||||
|
@type string, default is batch no.
|
||||||
|
|
||||||
|
@param outfile_suffix: to append to outfile
|
||||||
|
@type string
|
||||||
|
|
||||||
|
@return writes a .txt file containing url for the snps processed with user provided suffix in filename
|
||||||
@type string
|
@type string
|
||||||
"""
|
"""
|
||||||
|
|
||||||
with open(pdb_file, "rb") as pdb_file, open (mutation_list) as mutation_list:
|
with open(pdb_file, "rb") as pdb_file, open (mutation_list, "rb") as mutation_list:
|
||||||
files = {"wild": pdb_file
|
files = {"wild": pdb_file
|
||||||
, "mutation_list": mutation_list}
|
, "mutation_list": mutation_list}
|
||||||
body = {"chain": 'A'
|
body = {"chain": chain
|
||||||
, "email": 'tanushree.tunstall@lshtm.ac.uk'}
|
, "email": email_address}
|
||||||
|
|
||||||
response = requests.post(prediction_url, files = files, data = body)
|
response = requests.post(prediction_url, files = files, data = body)
|
||||||
print(response.status_code)
|
print(response.status_code)
|
||||||
if response.history:
|
if response.history:
|
||||||
print('PASS: valid mutation submitted. Fetching result url')
|
print('\nPASS: valid submission. Fetching result url')
|
||||||
url_match = re.search('/dynamut/results_prediction/.+(?=")', response.text)
|
url_match = re.search('/dynamut/results_prediction/.+(?=")', response.text)
|
||||||
url = host + url_match.group()
|
url = host_url + url_match.group()
|
||||||
print(url)
|
print('\nURL for snp batch no ', str(outfile_suffix), ':', url)
|
||||||
|
|
||||||
#===============
|
#===============
|
||||||
# writing file: result urls
|
# writing file: result urls
|
||||||
#===============
|
#===============
|
||||||
out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt'
|
dynamut_temp_dir = output_dir + '/dynamut_temp' # creates a temp dir within output_dir
|
||||||
print('Writing output url file:', out_url_file)
|
if not os.path.exists(dynamut_temp_dir):
|
||||||
|
print('\nCreating dynamut_temp in output_dir', output_dir )
|
||||||
|
os.makedirs(dynamut_temp_dir)
|
||||||
|
|
||||||
|
out_url_file = dynamut_temp_dir + '/dynamut_result_url_' + str(outfile_suffix) + '.txt'
|
||||||
|
print('\nWriting output url file:', out_url_file)
|
||||||
myfile = open(out_url_file, 'a')
|
myfile = open(out_url_file, 'a')
|
||||||
myfile.write(url)
|
myfile.write(url)
|
||||||
myfile.close()
|
myfile.close()
|
||||||
#====================
|
#%%#####################################################################
|
||||||
# Submit first batch
|
|
||||||
|
|
|
@ -1,121 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Created on Wed Aug 19 14:33:51 2020
|
|
||||||
|
|
||||||
@author: tanu
|
|
||||||
"""
|
|
||||||
#%% load packages
|
|
||||||
import os,sys
|
|
||||||
import subprocess
|
|
||||||
import argparse
|
|
||||||
import requests
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import pandas as pd
|
|
||||||
from pandas.api.types import is_string_dtype
|
|
||||||
from pandas.api.types import is_numeric_dtype
|
|
||||||
#%% homedir
|
|
||||||
homedir = os.path.expanduser('~')
|
|
||||||
print('My homedir is:', homedir)
|
|
||||||
#%%
|
|
||||||
def request_calculation(pdb_file
|
|
||||||
, mutation_list
|
|
||||||
, batch_no
|
|
||||||
, chain
|
|
||||||
, my_email
|
|
||||||
, prediction_url
|
|
||||||
, output_dir
|
|
||||||
#, gene_name
|
|
||||||
#, out_url_file
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Makes a POST request for dynamut predictions.
|
|
||||||
|
|
||||||
@param pdb_file: valid path to pdb structure
|
|
||||||
@type string
|
|
||||||
|
|
||||||
@param mutation_list: list of mutations (1 per line) of the format: {WT}<POS>{Mut}
|
|
||||||
@type string
|
|
||||||
|
|
||||||
@param batch_no: batch no so it can be added as a suffix to the the outfile
|
|
||||||
@type int
|
|
||||||
|
|
||||||
@param chain: single-letter(caps)
|
|
||||||
@type chr
|
|
||||||
|
|
||||||
@param prediction_url: dynamut url for prediction
|
|
||||||
@type string
|
|
||||||
|
|
||||||
@param output_dir: output dir
|
|
||||||
@type string
|
|
||||||
|
|
||||||
@param gene_name: name of gene
|
|
||||||
@type string
|
|
||||||
|
|
||||||
#@param out_url_file: name of output file with batch no. as suffix
|
|
||||||
@type string
|
|
||||||
|
|
||||||
@return txt file containing batch no. of snps processed (i.e out_url_file)
|
|
||||||
@type string
|
|
||||||
"""
|
|
||||||
|
|
||||||
with open(pdb_file, "rb") as pdb_file, open (mutation_list, "rb") as mutation_list:
|
|
||||||
files = {"wild": pdb_file
|
|
||||||
, "mutation_list": mutation_list}
|
|
||||||
body = {"chain": chain
|
|
||||||
, "email": my_email}
|
|
||||||
|
|
||||||
response = requests.post(prediction_url, files = files, data = body)
|
|
||||||
print(response.status_code)
|
|
||||||
if response.history:
|
|
||||||
print('\nPASS: valid submission. Fetching result url')
|
|
||||||
url_match = re.search('/dynamut/results_prediction/.+(?=")', response.text)
|
|
||||||
url = host + url_match.group()
|
|
||||||
print('\nURL for snp batch no ', str(batch_no), ':', url)
|
|
||||||
|
|
||||||
#===============
|
|
||||||
# writing file: result urls
|
|
||||||
#===============
|
|
||||||
dynamut_temp_dir = outdir + '/dynamut_temp'
|
|
||||||
if not os.path.exists(dynamut_temp_dir):
|
|
||||||
print('\nCreating dynamut_temp in outdir', outdir )
|
|
||||||
os.makedirs(dynamut_temp_dir)
|
|
||||||
|
|
||||||
out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt'
|
|
||||||
print('\nWriting output url file:', out_url_file)
|
|
||||||
myfile = open(out_url_file, 'a')
|
|
||||||
myfile.write(url)
|
|
||||||
myfile.close()
|
|
||||||
#%%globals!?
|
|
||||||
host = 'http://biosig.unimelb.edu.au'
|
|
||||||
#prediction_url = f"{host}/dynamut/prediction_list"
|
|
||||||
#print(prediction_url)
|
|
||||||
|
|
||||||
#gene = 'gid'
|
|
||||||
drug = 'streptomycin'
|
|
||||||
datadir = homedir + '/git/Data'
|
|
||||||
indir = datadir + '/' + drug + '/input'
|
|
||||||
outdir = datadir + '/' + drug + '/output'
|
|
||||||
#outdir = homedir + '/git/LSHTM_analysis/dynamut' # for example
|
|
||||||
|
|
||||||
my_chain = 'A'
|
|
||||||
my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb'
|
|
||||||
|
|
||||||
# batch 1: 00.txt
|
|
||||||
#my_mutation_list = homedir + '/git/Data/streptomycin/output/snp_batches/50/snp_batch_00.txt'
|
|
||||||
#my_batch = 1
|
|
||||||
|
|
||||||
# batch 2: 01.txt
|
|
||||||
my_mutation_list = outdir + '/snp_batches/50/snp_batch_01.txt'
|
|
||||||
my_batch = 2
|
|
||||||
|
|
||||||
# %% call this function
|
|
||||||
request_calculation (pdb_file = my_pdb_file
|
|
||||||
, mutation_list = my_mutation_list
|
|
||||||
, chain = my_chain
|
|
||||||
, my_email = 'tanushree.tunstall@lshtm.ac.uk'
|
|
||||||
, prediction_url = f"{host}/dynamut/prediction_list"
|
|
||||||
, output_dir = outdir
|
|
||||||
, batch_no = my_batch)
|
|
Loading…
Add table
Add a link
Reference in a new issue