renamed mcsm_wrapper to run_mcsm
This commit is contained in:
parent
c0fa9e3904
commit
44597ec563
1 changed files with 0 additions and 0 deletions
172
mcsm/run_mcsm.py
Executable file
172
mcsm/run_mcsm.py
Executable file
|
@ -0,0 +1,172 @@
|
|||
#!/usr/bin/env python3
|
||||
# mCSM Wrapper
|
||||
import os,sys
|
||||
import subprocess
|
||||
import argparse
|
||||
import pandas as pd
|
||||
|
||||
from mcsm import *
|
||||
|
||||
#%% command line args
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('-d', '--drug', help='drug name' , required=True)
|
||||
arg_parser.add_argument('-g', '--gene', help='gene name (case sensitive)', required=True) # case sensitive
|
||||
arg_parser.add_argument('-s', '--stage', help='mCSM Pipeline Stage', default = 'get', choices=['submit', 'get', 'format'], required=True)
|
||||
arg_parser.add_argument('-H', '--host', help='mCSM Server', default = 'http://biosig.unimelb.edu.au')
|
||||
arg_parser.add_argument('-U', '--url', help='mCSM Server URL', default = 'http://biosig.unimelb.edu.au/mcsm_lig/prediction')
|
||||
arg_parser.add_argument('-c', '--chain', help='Chain ID as per PDB, Case sensitive', default = 'A')
|
||||
arg_parser.add_argument('-l','--ligand', help='Ligand ID as per PDB, Case sensitive. REQUIRED only in "submit" stage')
|
||||
arg_parser.add_argument('-a','--affinity', help='Affinity in nM', default = 0.99)
|
||||
arg_parser.add_argument('-pdb','--pdb_file', help = 'PDB File')
|
||||
arg_parser.add_argument('--datadir', help = 'Data Directory')
|
||||
arg_parser.add_argument('--debug', action='store_true', help = 'Debug Mode')
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
gene = args.gene
|
||||
drug = args.drug
|
||||
stage = args.stage
|
||||
chain = args.chain
|
||||
ligand = args.ligand
|
||||
affinity = args.affinity
|
||||
pdb_filename = args.pdb_file
|
||||
data_dir = args.data_dir
|
||||
DEBUG = args.debug
|
||||
|
||||
# Actual Globals :-)
|
||||
host = args.host
|
||||
prediction_url = args.url
|
||||
|
||||
#host = "http://biosig.unimelb.edu.au"
|
||||
#prediction_url = f"{host}/mcsm_lig/prediction"
|
||||
#drug = 'isoniazid'
|
||||
#gene = 'KatG'
|
||||
|
||||
# submit_mcsm globals
|
||||
homedir = os.path.expanduser('~')
|
||||
|
||||
#os.chdir(homedir + '/git/LSHTM_analysis/mcsm')
|
||||
gene_match = gene + '_p.'
|
||||
|
||||
if data_dir:
|
||||
datadir = data_dir
|
||||
else:
|
||||
datadir = homedir + '/git/Data'
|
||||
|
||||
indir = datadir + '/' + drug + '/' + 'input'
|
||||
outdir = datadir + '/' + drug + '/' + 'output'
|
||||
|
||||
if pdb_filename:
|
||||
in_filename_pdb = pdb_filename
|
||||
else:
|
||||
in_filename_pdb = gene.lower() + '_complex.pdb'
|
||||
|
||||
infile_pdb = indir + '/' + in_filename_pdb
|
||||
|
||||
in_filename_snps = gene.lower() + '_mcsm_snps.csv' #(outfile_mcsm_snps, from data_extraction.py)
|
||||
infile_snps = outdir + '/' + in_filename_snps
|
||||
|
||||
# mcsm_results globals
|
||||
result_urls_filename = gene.lower() + '_result_urls.txt'
|
||||
result_urls = outdir + '/' + result_urls_filename
|
||||
if DEBUG:
|
||||
print('DEBUG: Result URLs:', result_urls)
|
||||
|
||||
mcsm_output_filename = gene.lower() + '_mcsm_output.csv'
|
||||
mcsm_output = outdir + '/' + mcsm_output_filename
|
||||
if DEBUG:
|
||||
print('DEBUG: mCSM output CSV file:', mcsm_output)
|
||||
|
||||
# format_results globals
|
||||
#out_filename_format = gene.lower() + '_mcsm_processed.csv'
|
||||
out_filename_format = gene.lower() + '_complex_mcsm_norm.csv'
|
||||
outfile_format = outdir + '/' + out_filename_format
|
||||
if DEBUG:
|
||||
print('DEBUG: formatted CSV output:', outfile_format)
|
||||
#%%=====================================================================
|
||||
def submit_mcsm():
|
||||
# Example:
|
||||
# chain = 'A'
|
||||
# ligand_id = 'RMP'
|
||||
# affinity = 10
|
||||
|
||||
print('Result urls and error file (if any) will be written in: ', outdir)
|
||||
|
||||
# call function to format data to remove duplicate snps before submitting job
|
||||
mcsm_muts = format_data(infile_snps)
|
||||
mut_count = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
|
||||
infile_snps_len = os.popen('wc -l < %s' % infile_snps).read() # quicker than using Python :-)
|
||||
print('Total SNPs for', gene, ':', infile_snps_len)
|
||||
for mcsm_mut in mcsm_muts:
|
||||
print('Processing mutation: %s of %s' % (mut_count, infile_snps_len), mcsm_mut)
|
||||
if DEBUG:
|
||||
print('DEBUG: Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, chain, ligand, affinity, prediction_url, outdir, gene)
|
||||
# function call: to request mcsm prediction
|
||||
# which writes file containing url for valid submissions and invalid muts to respective files
|
||||
holding_page = request_calculation(infile_pdb, mcsm_mut, chain, ligand, affinity, prediction_url, outdir, gene, host)
|
||||
time.sleep(1)
|
||||
mut_count += 1
|
||||
# result_url = write_result_url(holding_page, result_urls, host)
|
||||
|
||||
print('Request submitted'
|
||||
, '\nCAUTION: Processing will take at least ten'
|
||||
, 'minutes, but will be longer for more mutations.')
|
||||
#%%=====================================================================
|
||||
def get_results():
|
||||
output_df = pd.DataFrame()
|
||||
url_counter = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
|
||||
success_counter = 1
|
||||
infile_len = os.popen('wc -l < %s' % result_urls).read() # quicker than using Python :-) #FIXME filenme (infile_urls)
|
||||
|
||||
print('Total URLs:', infile_len)
|
||||
|
||||
with open(result_urls, 'r') as urlfile:
|
||||
for line in urlfile:
|
||||
url_line = line.strip()
|
||||
# call functions
|
||||
results_interim = scrape_results(url_line)
|
||||
if results_interim is not None:
|
||||
print('Processing URL: %s of %s' % (url_counter, infile_len))
|
||||
result_dict = build_result_dict(results_interim)
|
||||
df = pd.DataFrame(result_dict, index=[url_counter])
|
||||
output_df = output_df.append(df)
|
||||
success_counter += 1
|
||||
url_counter += 1
|
||||
|
||||
print('Total URLs: %s Successful: %s Failed: %s' % (url_counter-1, success_counter-1, (url_counter - success_counter)))
|
||||
|
||||
output_df.to_csv(mcsm_output, index = None, header = True)
|
||||
#%%=====================================================================
|
||||
def format_results():
|
||||
print('Input file:', mcsm_output
|
||||
, '\n============================================================='
|
||||
, '\nOutput file:', outfile_format
|
||||
, '\n=============================================================')
|
||||
|
||||
# call function
|
||||
mcsm_df_formatted = format_mcsm_output(mcsm_output)
|
||||
|
||||
# writing file
|
||||
print('Writing formatted df to csv')
|
||||
mcsm_df_formatted.to_csv(outfile_format, index = False)
|
||||
|
||||
print('Finished writing file:'
|
||||
, '\nFile:', outfile_format
|
||||
, '\nExpected no. of rows:', len(mcsm_df_formatted)
|
||||
, '\nExpected no. of cols:', len(mcsm_df_formatted.columns)
|
||||
, '\n=============================================================')
|
||||
#%%=====================================================================
|
||||
def main():
|
||||
if stage == 'submit':
|
||||
print('mCSM stage: submit mutations for mcsm analysis')
|
||||
submit_mcsm()
|
||||
elif stage == 'get':
|
||||
print('mCSM stage: get results')
|
||||
get_results()
|
||||
elif stage == 'format':
|
||||
print('mCSM stage: format results')
|
||||
format_results()
|
||||
else:
|
||||
print('ERROR: invalid stage')
|
||||
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue