various debug, doc, and args
This commit is contained in:
parent
f6fc6e47ab
commit
e2f319ba42
4 changed files with 77 additions and 40 deletions
|
@ -10,7 +10,7 @@ Requires an additional 'Data' directory. Batteries not included:-)
|
|||
## Assumptions
|
||||
|
||||
1. git repos are cloned to `~/git`
|
||||
2. Requires a `Data/` in `~/git` which has the struc created by `mk_drug_dirs.sh`
|
||||
2. Requires a data directory with an `input` and `output` subdirs. Can be specified on the CLI with `--datadir`, and optionally can be created with `mk_drug_dirs.sh <DRUG_NAME>`
|
||||
|
||||
## LSHTM\_analysis:
|
||||
|
||||
|
|
30
mcsm/mcsm.py
30
mcsm/mcsm.py
|
@ -135,7 +135,7 @@ def scrape_results(result_url):
|
|||
else:
|
||||
return web_result_raw
|
||||
else:
|
||||
print('FAIL: Could not fetch results'
|
||||
sys.exit('FAIL: Could not fetch results'
|
||||
, '\nCheck if url is valid')
|
||||
|
||||
|
||||
|
@ -234,7 +234,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
, '\nDim of data:', mcsm_data.shape
|
||||
, '\n===============================================================')
|
||||
else:
|
||||
print('FAIL (but not fatal): Duplicate mutations detected'
|
||||
print('WARNING: Duplicate mutations detected'
|
||||
, '\nDim of df with duplicates:', mcsm_data.shape
|
||||
, 'Removing duplicate entries')
|
||||
mcsm_data = mcsm_data.drop_duplicates(['mutation_information'])
|
||||
|
@ -252,14 +252,14 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
DUET_pos = c.get(key = 'duet_stability_change')
|
||||
# Assign category based on sign (+ve : Stabilising, -ve: Destabilising, Mind the spelling (British spelling))
|
||||
mcsm_data['duet_outcome'] = np.where(mcsm_data['duet_stability_change']>=0, 'Stabilising', 'Destabilising')
|
||||
mcsm_data['duet_outcome'].value_counts()
|
||||
if DUET_pos == mcsm_data['duet_outcome'].value_counts()['Stabilising']:
|
||||
print('PASS: DUET outcome assigned correctly')
|
||||
else:
|
||||
print('FAIL: DUET outcome assigned incorrectly'
|
||||
, '\nExpected no. of stabilising mutations:', DUET_pos
|
||||
, '\nGot no. of stabilising mutations', mcsm_data['duet_outcome'].value_counts()['Stabilising']
|
||||
, '\n===============================================================')
|
||||
print('DUET Outcome:', mcsm_data['duet_outcome'].value_counts())
|
||||
#if DUET_pos == mcsm_data['duet_outcome'].value_counts()['Stabilising']:
|
||||
# print('PASS: DUET outcome assigned correctly')
|
||||
#else:
|
||||
# print('FAIL: DUET outcome assigned incorrectly'
|
||||
# , '\nExpected no. of stabilising mutations:', DUET_pos
|
||||
# , '\nGot no. of stabilising mutations', mcsm_data['duet_outcome'].value_counts()['Stabilising']
|
||||
# , '\n===============================================================')
|
||||
#%%===========================================================================
|
||||
#############
|
||||
# Extract numeric
|
||||
|
@ -270,7 +270,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
mcsm_data['ligand_distance']
|
||||
print('extracting numeric part of col: ligand_distance')
|
||||
mcsm_data['ligand_distance'] = mcsm_data['ligand_distance'].str.extract('(\d+\.?\d*)')
|
||||
mcsm_data['ligand_distance']
|
||||
print('Ligand Distance:',mcsm_data['ligand_distance'])
|
||||
#%%===========================================================================
|
||||
#############
|
||||
# Create 2 columns:
|
||||
|
@ -310,7 +310,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
, '\nNo. of predicted affinity changes:\n', british_spl
|
||||
, '\n===============================================================')
|
||||
else:
|
||||
print('FAIL: spelling change unsucessfull'
|
||||
sys.exit('FAIL: spelling change unsucessfull'
|
||||
, '\nExpected:\n', american_spl
|
||||
, '\nGot:\n', british_spl
|
||||
, '\n===============================================================')
|
||||
|
@ -338,7 +338,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
, '\nchanged to numeric'
|
||||
, '\n===============================================================')
|
||||
else:
|
||||
print('FAIL:dtype change to numeric for selected cols unsuccessful'
|
||||
sys.exit('FAIL:dtype change to numeric for selected cols unsuccessful'
|
||||
, '\n===============================================================')
|
||||
print(mcsm_data.dtypes)
|
||||
#%%===========================================================================
|
||||
|
@ -403,7 +403,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
print('PASS: dtypes for char cols:', char_cols, 'are indeed string'
|
||||
, '\n===============================================================')
|
||||
else:
|
||||
print('FAIL:dtype change to numeric for selected cols unsuccessful'
|
||||
sys.exit('FAIL:dtype change to numeric for selected cols unsuccessful'
|
||||
, '\n===============================================================')
|
||||
#mcsm_data['ligand_distance', 'ligand_affinity_change'].apply(is_numeric_dtype(mcsm_data['ligand_distance', 'ligand_affinity_change']))
|
||||
print(mcsm_data.dtypes)
|
||||
|
@ -430,7 +430,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
, '\nformatted df shape:', mcsm_dataf.shape
|
||||
, '\n===============================================================')
|
||||
else:
|
||||
print('FAIL: something went wrong in formatting df'
|
||||
sys.exit('FAIL: something went wrong in formatting df'
|
||||
, '\nLen of orig df:', dforig_len
|
||||
, '\nExpected number of cols to add:', expected_ncols_toadd
|
||||
, '\nExpected no. of cols:', expected_cols, '(', dforig_len, '+', expected_ncols_toadd, ')'
|
||||
|
|
|
@ -9,23 +9,34 @@ from mcsm import *
|
|||
|
||||
#%% command line args
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('-d', '--drug',required=True, help='drug name')
|
||||
arg_parser.add_argument('-d', '--drug', help='drug name' , required=True)
|
||||
arg_parser.add_argument('-g', '--gene', help='gene name (case sensitive)', required=True) # case sensitive
|
||||
arg_parser.add_argument('-s', '--stage', help='mCSM Pipeline Stage', default = 'get', choices=['submit', 'get', 'format'])
|
||||
arg_parser.add_argument('-s', '--stage', help='mCSM Pipeline Stage', default = 'get', choices=['submit', 'get', 'format'], required=True)
|
||||
arg_parser.add_argument('-H', '--host', help='mCSM Server', default = 'http://biosig.unimelb.edu.au')
|
||||
arg_parser.add_argument('-U', '--url', help='mCSM Server URL', default = 'http://biosig.unimelb.edu.au/mcsm_lig/prediction')
|
||||
arg_parser.add_argument('-c', '--chain', help='Chain ID as per PDB, Case sensitive', default = 'A')
|
||||
arg_parser.add_argument('-l','--ligand', help='Ligand ID as per PDB, Case sensitive. REQUIRED only in "submit" stage')
|
||||
arg_parser.add_argument('-a','--affinity', help='Affinity in nM', default = 10)
|
||||
#arg_parser.add_argument('-p','--pdb_file', help = 'PDB File')
|
||||
arg_parser.add_argument('--datadir', help = 'Data Directory')
|
||||
arg_parser.add_argument('--debug', action='store_true', help = 'Debug Mode')
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
gene = args.gene
|
||||
drug = args.drug
|
||||
stage = args.stage
|
||||
|
||||
# Statics. Replace with argparse() later
|
||||
chain = args.chain
|
||||
ligand = args.ligand
|
||||
affinity = args.affinity
|
||||
#pdb_file = args.pdb_file
|
||||
datadir = args.datadir
|
||||
DEBUG = args.debug
|
||||
|
||||
# Actual Globals :-)
|
||||
host = args.host
|
||||
prediction_url = args.url
|
||||
|
||||
#host = "http://biosig.unimelb.edu.au"
|
||||
#prediction_url = f"{host}/mcsm_lig/prediction"
|
||||
#drug = 'isoniazid'
|
||||
|
@ -34,38 +45,48 @@ prediction_url = args.url
|
|||
# submit_mcsm globals
|
||||
homedir = os.path.expanduser('~')
|
||||
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/mcsm')
|
||||
#os.chdir(homedir + '/git/LSHTM_analysis/mcsm')
|
||||
gene_match = gene + '_p.'
|
||||
datadir = homedir + '/git/Data'
|
||||
|
||||
indir = datadir + '/' + drug + '/' + 'input'
|
||||
outdir = datadir + '/' + drug + '/' + 'output'
|
||||
if datadir:
|
||||
basedir = datadir
|
||||
else:
|
||||
basedir = homedir + '/git/Data'
|
||||
|
||||
indir = basedir + '/' + drug + '/' + 'input'
|
||||
outdir = basedir + '/' + drug + '/' + 'output'
|
||||
|
||||
in_filename_pdb = gene.lower() + '_complex.pdb'
|
||||
infile_pdb = indir + '/' + in_filename_pdb
|
||||
|
||||
|
||||
#in_filename_snps = gene.lower() + '_mcsm_snps_test.csv' #(outfile2, from data_extraction.py)
|
||||
in_filename_snps = gene.lower() + '_mcsm_snps.csv' #(outfile2, from data_extraction.py)
|
||||
infile_snps = outdir + '/' + in_filename_snps
|
||||
|
||||
# mcsm_results globals
|
||||
result_urls_filename = gene.lower() + '_result_urls.txt'
|
||||
result_urls = outdir + '/' + result_urls_filename
|
||||
if DEBUG:
|
||||
print('DEBUG: Result URLs:', result_urls)
|
||||
|
||||
# mcsm_results globals
|
||||
print('infile:', result_urls)
|
||||
mcsm_output_filename = gene.lower() + '_mcsm_output.csv'
|
||||
mcsm_output = outdir + '/' + mcsm_output_filename
|
||||
if DEBUG:
|
||||
print('DEBUG: mCSM output CSV file:', mcsm_output)
|
||||
|
||||
# format_results globals
|
||||
print('infile:', mcsm_output)
|
||||
out_filename_format = gene.lower() + '_mcsm_processed.csv'
|
||||
outfile_format = outdir + '/' + out_filename_format
|
||||
if DEBUG:
|
||||
print('DEBUG: formatted CSV output:', outfile_format)
|
||||
#%%=====================================================================
|
||||
def submit_mcsm():
|
||||
my_chain = 'A'
|
||||
# my_ligand_id = 'DCS' # FIXME
|
||||
my_ligand_id = 'RMP' # FIXME
|
||||
my_affinity = 10
|
||||
|
||||
# Example:
|
||||
# chain = 'A'
|
||||
# ligand_id = 'RMP'
|
||||
# affinity = 10
|
||||
|
||||
print('Result urls and error file (if any) will be written in: ', outdir)
|
||||
|
||||
|
@ -76,10 +97,11 @@ def submit_mcsm():
|
|||
print('Total SNPs for', gene, ':', infile_snps_len)
|
||||
for mcsm_mut in mcsm_muts:
|
||||
print('Processing mutation: %s of %s' % (mut_count, infile_snps_len), mcsm_mut)
|
||||
print('Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene)
|
||||
if DEBUG:
|
||||
print('DEBUG: Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, chain, ligand, affinity, prediction_url, outdir, gene)
|
||||
# function call: to request mcsm prediction
|
||||
# which writes file containing url for valid submissions and invalid muts to respective files
|
||||
holding_page = request_calculation(infile_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene, host)
|
||||
holding_page = request_calculation(infile_pdb, mcsm_mut, chain, ligand, affinity, prediction_url, outdir, gene, host)
|
||||
time.sleep(1)
|
||||
mut_count += 1
|
||||
# result_url = write_result_url(holding_page, result_urls, host)
|
||||
|
|
|
@ -11,6 +11,21 @@ home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_residue_renumber /home
|
|||
#======================================================
|
||||
/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_seq -a /home/tanu/git/Data/ethambutol/input/3byw.pdb > 3byw_seq.txt
|
||||
#/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_seq -c A -a /home/tanu/git/Data/ethambutol/input/3byw.pdb > 3byw_seq.txt
|
||||
======
|
||||
# gidB
|
||||
=======
|
||||
/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_seq -a /home/tanu/git/LSHTM_3TB/gid/docking/3g89.pdb > 3g89_seq.txt
|
||||
/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_seq -a /home/tanu/git/LSHTM_3TB/gid/docking/gidb_chopin1.pdb > gidb_chopin1_seq.txt
|
||||
|
||||
alignment
|
||||
>3g89A_ATOM chain_length:238
|
||||
MFGKHPGGLSERGRALLLEGGKALGLDLKPHLEAFSRLYALLQEAGEEEVVVKHFLDSLTLLRLPLWQGPLRVLDLGTGA
|
||||
GFPGLPLKIVRPELELVLVDATRKKVAFVERAIEVLGLKGARALWGRAEVLAREAGHREAYARAVARAVAPLCVLSELLL
|
||||
PFLEVGGAAVAMKGPRVEEELAPLPPALERLGGRLGEVLALQLPLSGEARHLVVLEKTAPTPPAYPRRPGVPERHPLC
|
||||
>gidb_chopin1 _ATOM chain_length:224
|
||||
MSPIEPAASAIFGPRLGLARRYAEALAGPGVERGLVGPREVGRLWDRHLLNCAVIGELLERGDRVVDIGSGAGLPGVPLA
|
||||
IARPDLQVVLLEPLLRRTESLREMVTDLGVAVEIVRGRAEESWVQDQLGGSDAAVSRAVAALDKLTKWSMPLIRPNGRML
|
||||
AIKGERAHDEVREHRRVMIASGAVDVRVVTCGANYLRPPATVVFARRGKQIARGSARMASGGTA
|
||||
|
||||
#======================================================
|
||||
# pdb_mutator.py: mutate residue: FIXME, needs charm
|
||||
|
@ -26,7 +41,7 @@ home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_residue_renumber /home
|
|||
/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_ligand /home/tanu/git/Data/ethambutol/input/7bvf.pdb
|
||||
|
||||
#======================================================
|
||||
# pdb_ligand_tt.py: list ligands for valid pdbs AND docked complexes (my use case)
|
||||
# pdb_hetatm.py: list ligands for valid pdbs AND docked complexes (my use case)
|
||||
#======================================================
|
||||
/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_ligand_tt /home/tanu/git/Data/cycloserine/input/alr_complex.pdb
|
||||
/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_ligand_tt /home/tanu/git/Data/pyrazinamide/input/pnca_complex.pdb
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue