add wrapper and mcsm library
This commit is contained in:
parent
23c2ddf45f
commit
bc03aab82d
6 changed files with 558 additions and 678 deletions
145
mcsm/mcsm_wrapper.py
Executable file
145
mcsm/mcsm_wrapper.py
Executable file
|
@ -0,0 +1,145 @@
|
|||
#!/usr/bin/env python3
|
||||
# mCSM Wrapper
|
||||
import os,sys
|
||||
import subprocess
|
||||
import argparse
|
||||
import pandas as pd
|
||||
|
||||
from mcsm import *
|
||||
|
||||
#%% command line args
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('-d', '--drug',required=True, help='drug name')
|
||||
arg_parser.add_argument('-g', '--gene', help='gene name (case sensitive)', required=True) # case sensitive
|
||||
arg_parser.add_argument('-s', '--stage', help='mCSM Pipeline Stage', default = 'get', choices=['submit', 'get', 'format'])
|
||||
arg_parser.add_argument('-H', '--host', help='mCSM Server', default = 'http://biosig.unimelb.edu.au')
|
||||
arg_parser.add_argument('-U', '--url', help='mCSM Server URL', default = 'http://biosig.unimelb.edu.au/mcsm_lig/prediction')
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
gene = args.gene
|
||||
drug = args.drug
|
||||
stage = args.stage
|
||||
|
||||
# Statics. Replace with argparse() later
|
||||
|
||||
# Actual Globals :-)
|
||||
host = args.host
|
||||
prediction_url = args.url
|
||||
#host = "http://biosig.unimelb.edu.au"
|
||||
#prediction_url = f"{host}/mcsm_lig/prediction"
|
||||
#drug = 'isoniazid'
|
||||
#gene = 'KatG'
|
||||
|
||||
# submit_mcsm globals
|
||||
homedir = os.path.expanduser('~')
|
||||
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/mcsm')
|
||||
gene_match = gene + '_p.'
|
||||
datadir = homedir + '/git/Data'
|
||||
|
||||
indir = datadir + '/' + drug + '/' + 'input'
|
||||
outdir = datadir + '/' + drug + '/' + 'output'
|
||||
|
||||
in_filename_pdb = gene.lower() + '_complex.pdb'
|
||||
infile_pdb = indir + '/' + in_filename_pdb
|
||||
|
||||
#in_filename_snps = gene.lower() + '_mcsm_snps_test.csv' #(outfile2, from data_extraction.py)
|
||||
in_filename_snps = gene.lower() + '_mcsm_snps.csv' #(outfile2, from data_extraction.py)
|
||||
infile_snps = outdir + '/' + in_filename_snps
|
||||
|
||||
result_urls_filename = gene.lower() + '_result_urls.txt'
|
||||
result_urls = outdir + '/' + result_urls_filename
|
||||
|
||||
# mcsm_results globals
|
||||
print('infile:', result_urls)
|
||||
mcsm_output_filename = gene.lower() + '_mcsm_output.csv'
|
||||
mcsm_output = outdir + '/' + mcsm_output_filename
|
||||
|
||||
# format_results globals
|
||||
print('infile:', mcsm_output)
|
||||
out_filename_format = gene.lower() + '_mcsm_processed.csv'
|
||||
outfile_format = outdir + '/' + out_filename_format
|
||||
#%%=====================================================================
|
||||
def submit_mcsm():
|
||||
my_chain = 'A'
|
||||
my_ligand_id = 'DCS' # FIXME
|
||||
my_affinity = 10
|
||||
|
||||
print('Result urls and error file (if any) will be written in: ', outdir)
|
||||
|
||||
# call function to format data to remove duplicate snps before submitting job
|
||||
mcsm_muts = format_data(infile_snps)
|
||||
mut_count = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
|
||||
infile_snps_len = os.popen('wc -l < %s' % infile_snps).read() # quicker than using Python :-)
|
||||
print('Total SNPs for', gene, ':', infile_snps_len)
|
||||
for mcsm_mut in mcsm_muts:
|
||||
print('Processing mutation: %s of %s' % (mut_count, infile_snps_len), mcsm_mut)
|
||||
print('Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene)
|
||||
# function call: to request mcsm prediction
|
||||
# which writes file containing url for valid submissions and invalid muts to respective files
|
||||
holding_page = request_calculation(infile_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene, host)
|
||||
time.sleep(1)
|
||||
mut_count += 1
|
||||
# result_url = write_result_url(holding_page, result_urls, host)
|
||||
|
||||
print('Request submitted'
|
||||
, '\nCAUTION: Processing will take at least ten'
|
||||
, 'minutes, but will be longer for more mutations.')
|
||||
#%%=====================================================================
|
||||
def get_results():
|
||||
output_df = pd.DataFrame()
|
||||
url_counter = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
|
||||
infile_len = os.popen('wc -l < %s' % result_urls).read() # quicker than using Python :-) #FIXME filenme (infile_urls)
|
||||
|
||||
print('Total URLs:', infile_len)
|
||||
|
||||
with open(result_urls, 'r') as urlfile:
|
||||
for line in urlfile:
|
||||
url_line = line.strip()
|
||||
|
||||
# call functions
|
||||
results_interim = scrape_results(url_line)
|
||||
result_dict = build_result_dict(results_interim)
|
||||
|
||||
print('Processing URL: %s of %s' % (url_counter, infile_len))
|
||||
df = pd.DataFrame(result_dict, index=[url_counter])
|
||||
url_counter += 1
|
||||
output_df = output_df.append(df)
|
||||
|
||||
output_df.to_csv(mcsm_output, index = None, header = True)
|
||||
#%%=====================================================================
|
||||
def format_results():
|
||||
print('Input file:', mcsm_output
|
||||
, '\n============================================================='
|
||||
, '\nOutput file:', outfile_format
|
||||
, '\n=============================================================')
|
||||
|
||||
# call function
|
||||
mcsm_df_formatted = format_mcsm_output(mcsm_output)
|
||||
|
||||
# writing file
|
||||
print('Writing formatted df to csv')
|
||||
mcsm_df_formatted.to_csv(outfile_format, index = False)
|
||||
|
||||
print('Finished writing file:'
|
||||
, '\nFilename:', out_filename_format
|
||||
, '\nPath:', outdir
|
||||
, '\nExpected no. of rows:', len(mcsm_df_formatted)
|
||||
, '\nExpected no. of cols:', len(mcsm_df_formatted)
|
||||
, '\n=============================================================')
|
||||
#%%=====================================================================
|
||||
def main():
|
||||
if stage == 'submit':
|
||||
print('mCSM stage: submit mutations for mcsm analysis')
|
||||
submit_mcsm()
|
||||
elif stage == 'get':
|
||||
print('mCSM stage: get results')
|
||||
get_results()
|
||||
elif stage == 'format':
|
||||
print('mCSM stage: format results')
|
||||
format_results()
|
||||
else:
|
||||
print('ERROR: invalid stage')
|
||||
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue