From 01fbc2a87b41329c9e2531575e9194f4bcaefd96 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 8 Jul 2020 20:30:32 +0100 Subject: [PATCH] ran foldx and mcsm (get) for 33k dataset --- foldx/runFoldx.py | 15 +++++++-------- mcsm/mcsm_wrapper.py | 6 +++--- scripts/data_extraction.py | 8 ++++---- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/foldx/runFoldx.py b/foldx/runFoldx.py index b1bbcd4..f11a72f 100755 --- a/foldx/runFoldx.py +++ b/foldx/runFoldx.py @@ -37,8 +37,8 @@ arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb fi arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + drug + output', default = None) -arg_parser.add_argument('-f', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called gene_complex.pdb', default = None) -arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called gene_test_snps.csv exists', default = None) +arg_parser.add_argument('-f', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called _complex.pdb', default = None) +arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called _snps.csv exists', default = None) arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive @@ -75,16 +75,15 @@ if not indir: if not outdir: outdir = datadir + '/' + drug + '/' + 'output' -# FIXME: -process_dir = datadir + '/' + drug +'/' + 'processing' # FIXME: this is a temporary directory and should be correctly handled +process_dir = datadir + '/' + drug +'/' + 'processing' + os.mkdir(process_dir) #======= # input #======= # FIXME - if pdb_filename: pdb_name = Path(pdb_filename).stem else: @@ -97,8 +96,8 @@ actual_pdb_filename = Path(infile_pdb).name if mut_filename: mutation_file = mut_filename else: - #mutation_file = gene.lower() + '_mcsm_snps.csv' #real - mutation_file = gene.lower() + '_test_snps.csv' #test + mutation_file = gene.lower() + '_mcsm_snps.csv' #real + #mutation_file = gene.lower() + '_test_snps.csv' #test infile_muts = outdir + '/' + mutation_file @@ -184,7 +183,7 @@ def loadFiles(df): def main(): pdbname = pdb_name comp = '' # for complex only - mut_filename = infile_muts #pnca_test_snps.csv + mut_filename = infile_muts #pnca_mcsm_snps.csv mutlist = formatMuts(mut_filename, pdbname) print(mutlist) diff --git a/mcsm/mcsm_wrapper.py b/mcsm/mcsm_wrapper.py index b280e65..9d34c4e 100755 --- a/mcsm/mcsm_wrapper.py +++ b/mcsm/mcsm_wrapper.py @@ -16,7 +16,7 @@ arg_parser.add_argument('-H', '--host', help='mCSM Server', default = 'http:/ arg_parser.add_argument('-U', '--url', help='mCSM Server URL', default = 'http://biosig.unimelb.edu.au/mcsm_lig/prediction') arg_parser.add_argument('-c', '--chain', help='Chain ID as per PDB, Case sensitive', default = 'A') arg_parser.add_argument('-l','--ligand', help='Ligand ID as per PDB, Case sensitive. REQUIRED only in "submit" stage') -arg_parser.add_argument('-a','--affinity', help='Affinity in nM', default = 10) +arg_parser.add_argument('-a','--affinity', help='Affinity in nM', default = 0.99) #arg_parser.add_argument('-p','--pdb_file', help = 'PDB File') arg_parser.add_argument('--datadir', help = 'Data Directory') arg_parser.add_argument('--debug', action='store_true', help = 'Debug Mode') @@ -60,8 +60,8 @@ in_filename_pdb = gene.lower() + '_complex.pdb' infile_pdb = indir + '/' + in_filename_pdb -#in_filename_snps = gene.lower() + '_mcsm_snps_test.csv' #(outfile2, from data_extraction.py) -in_filename_snps = gene.lower() + '_mcsm_snps.csv' #(outfile2, from data_extraction.py) +#in_filename_snps = gene.lower() + '_mcsm_snps_test.csv' +in_filename_snps = gene.lower() + '_mcsm_snps.csv' #(outfile_mcsm_snps, from data_extraction.py) infile_snps = outdir + '/' + in_filename_snps # mcsm_results globals diff --git a/scripts/data_extraction.py b/scripts/data_extraction.py index b82da65..e9500e3 100755 --- a/scripts/data_extraction.py +++ b/scripts/data_extraction.py @@ -73,11 +73,11 @@ arg_parser.add_argument('-g', '--gene', help='gene name (case sensitive)', defau args = arg_parser.parse_args() #======================================================================= #%% variable assignment: input and output paths & filenames -drug = args.drug -gene = args.gene +#drug = args.drug +#gene = args.gene -#drug = 'pyrazinamide' -#gene = 'pncA' +drug = 'pyrazinamide' +gene = 'pncA' gene_match = gene + '_p.' print('mut pattern for gene', gene, ':', gene_match)