handling missing dir for data_extraction.py

This commit is contained in:
Tanushree Tunstall 2020-11-12 13:21:06 +00:00
parent c7194b7423
commit e67fbfd986
3 changed files with 51 additions and 17 deletions

View file

@ -59,14 +59,14 @@ import pandas as pd
import numpy as np
import argparse
#=======================================================================
#%% homdir and curr dir and local imports
#%% dir and local imports
homedir = os.path.expanduser('~')
# set working dir
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
os.getcwd()
# import aa dict
# Requires
from reference_dict import my_aa_dict # CHECK DIR STRUC THERE!
from tidy_split import tidy_split
#=======================================================================
@ -74,16 +74,52 @@ from tidy_split import tidy_split
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-d', '--drug', help='drug name (case sensitive)', default = None)
arg_parser.add_argument('-g', '--gene', help='gene name (case sensitive)', default = None)
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode')
args = arg_parser.parse_args()
#=======================================================================
#%% variable assignment: input and output paths & filenames
drug = args.drug
gene = args.gene
datadir = args.datadir
indir = args.input_dir
outdir = args.output_dir
#drug = 'pyrazinamide'
#gene = 'pncA'
#%% input and output dirs and files
#=======
# dirs
#=======
if not datadir:
datadir = homedir + '/' + 'git/Data'
if not indir:
indir = datadir + '/' + drug + '/input'
if not outdir:
outdir = datadir + '/' + drug + '/output'
# handle missing dirs here
if not os.path.isdir(datadir):
print('ERROR: Data directory does not exist:', datadir
, '\nPlease create and ensure gwas data is present and then rerun')
sys.exit()
if not os.path.isdir(indir):
print('ERROR: Input directory does not exist:', indir
, '\nPlease either create or specify indir and rerun')
sys.exit()
if not os.path.isdir(outdir):
print('ERROR: Output directory does not exist:', outdir
, '\nPlease create or specify outdir and rerun')
sys.exit()
#=======================================================================
gene_match = gene + '_p.'
print('mut pattern for gene', gene, ':', gene_match)
@ -114,13 +150,6 @@ print('Extracting columns based on variables:\n'
, resistance_col
, '\n===============================================================')
#=======================================================================
#%% input and output dirs and files
#=======
# dirs
#=======
datadir = homedir + '/' + 'git/Data'
indir = datadir + '/' + drug + '/' + 'input'
outdir = datadir + '/' + drug + '/' + 'output'
#=======
# input