handling missing dir for data_extraction.py
This commit is contained in:
parent
c7194b7423
commit
e67fbfd986
3 changed files with 51 additions and 17 deletions
|
@ -59,14 +59,14 @@ import pandas as pd
|
|||
import numpy as np
|
||||
import argparse
|
||||
#=======================================================================
|
||||
#%% homdir and curr dir and local imports
|
||||
#%% dir and local imports
|
||||
homedir = os.path.expanduser('~')
|
||||
# set working dir
|
||||
os.getcwd()
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
|
||||
os.getcwd()
|
||||
|
||||
# import aa dict
|
||||
# Requires
|
||||
from reference_dict import my_aa_dict # CHECK DIR STRUC THERE!
|
||||
from tidy_split import tidy_split
|
||||
#=======================================================================
|
||||
|
@ -74,16 +74,52 @@ from tidy_split import tidy_split
|
|||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('-d', '--drug', help='drug name (case sensitive)', default = None)
|
||||
arg_parser.add_argument('-g', '--gene', help='gene name (case sensitive)', default = None)
|
||||
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
|
||||
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
|
||||
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
|
||||
|
||||
arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode')
|
||||
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
#=======================================================================
|
||||
#%% variable assignment: input and output paths & filenames
|
||||
drug = args.drug
|
||||
gene = args.gene
|
||||
datadir = args.datadir
|
||||
indir = args.input_dir
|
||||
outdir = args.output_dir
|
||||
|
||||
#drug = 'pyrazinamide'
|
||||
#gene = 'pncA'
|
||||
|
||||
#%% input and output dirs and files
|
||||
#=======
|
||||
# dirs
|
||||
#=======
|
||||
if not datadir:
|
||||
datadir = homedir + '/' + 'git/Data'
|
||||
|
||||
if not indir:
|
||||
indir = datadir + '/' + drug + '/input'
|
||||
|
||||
if not outdir:
|
||||
outdir = datadir + '/' + drug + '/output'
|
||||
|
||||
# handle missing dirs here
|
||||
if not os.path.isdir(datadir):
|
||||
print('ERROR: Data directory does not exist:', datadir
|
||||
, '\nPlease create and ensure gwas data is present and then rerun')
|
||||
sys.exit()
|
||||
if not os.path.isdir(indir):
|
||||
print('ERROR: Input directory does not exist:', indir
|
||||
, '\nPlease either create or specify indir and rerun')
|
||||
sys.exit()
|
||||
if not os.path.isdir(outdir):
|
||||
print('ERROR: Output directory does not exist:', outdir
|
||||
, '\nPlease create or specify outdir and rerun')
|
||||
sys.exit()
|
||||
#=======================================================================
|
||||
gene_match = gene + '_p.'
|
||||
print('mut pattern for gene', gene, ':', gene_match)
|
||||
|
||||
|
@ -114,13 +150,6 @@ print('Extracting columns based on variables:\n'
|
|||
, resistance_col
|
||||
, '\n===============================================================')
|
||||
#=======================================================================
|
||||
#%% input and output dirs and files
|
||||
#=======
|
||||
# dirs
|
||||
#=======
|
||||
datadir = homedir + '/' + 'git/Data'
|
||||
indir = datadir + '/' + drug + '/' + 'input'
|
||||
outdir = datadir + '/' + drug + '/' + 'output'
|
||||
|
||||
#=======
|
||||
# input
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue