From e67fbfd9864161ed522d72e4bf5bf7e31c6146b0 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 12 Nov 2020 13:21:06 +0000 Subject: [PATCH] handling missing dir for data_extraction.py --- scripts/data_extraction.py | 47 ++++++++++++++++++++++++++++++-------- scripts/reference_dict.py | 16 ++++++------- scripts/running_scripts | 5 ++++ 3 files changed, 51 insertions(+), 17 deletions(-) diff --git a/scripts/data_extraction.py b/scripts/data_extraction.py index cfced69..15f35e0 100755 --- a/scripts/data_extraction.py +++ b/scripts/data_extraction.py @@ -59,14 +59,14 @@ import pandas as pd import numpy as np import argparse #======================================================================= -#%% homdir and curr dir and local imports +#%% dir and local imports homedir = os.path.expanduser('~') # set working dir os.getcwd() os.chdir(homedir + '/git/LSHTM_analysis/scripts') os.getcwd() -# import aa dict +# Requires from reference_dict import my_aa_dict # CHECK DIR STRUC THERE! from tidy_split import tidy_split #======================================================================= @@ -74,16 +74,52 @@ from tidy_split import tidy_split arg_parser = argparse.ArgumentParser() arg_parser.add_argument('-d', '--drug', help='drug name (case sensitive)', default = None) arg_parser.add_argument('-g', '--gene', help='gene name (case sensitive)', default = None) +arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data') +arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + + input') +arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + + output') + +arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') + args = arg_parser.parse_args() #======================================================================= #%% variable assignment: input and output paths & filenames drug = args.drug gene = args.gene +datadir = args.datadir +indir = args.input_dir +outdir = args.output_dir #drug = 'pyrazinamide' #gene = 'pncA' +#%% input and output dirs and files +#======= +# dirs +#======= +if not datadir: + datadir = homedir + '/' + 'git/Data' + +if not indir: + indir = datadir + '/' + drug + '/input' + +if not outdir: + outdir = datadir + '/' + drug + '/output' + +# handle missing dirs here +if not os.path.isdir(datadir): + print('ERROR: Data directory does not exist:', datadir + , '\nPlease create and ensure gwas data is present and then rerun') + sys.exit() +if not os.path.isdir(indir): + print('ERROR: Input directory does not exist:', indir + , '\nPlease either create or specify indir and rerun') + sys.exit() +if not os.path.isdir(outdir): + print('ERROR: Output directory does not exist:', outdir + , '\nPlease create or specify outdir and rerun') + sys.exit() +#======================================================================= gene_match = gene + '_p.' print('mut pattern for gene', gene, ':', gene_match) @@ -114,13 +150,6 @@ print('Extracting columns based on variables:\n' , resistance_col , '\n===============================================================') #======================================================================= -#%% input and output dirs and files -#======= -# dirs -#======= -datadir = homedir + '/' + 'git/Data' -indir = datadir + '/' + drug + '/' + 'input' -outdir = datadir + '/' + drug + '/' + 'output' #======= # input diff --git a/scripts/reference_dict.py b/scripts/reference_dict.py index d012898..eff2211 100755 --- a/scripts/reference_dict.py +++ b/scripts/reference_dict.py @@ -41,9 +41,9 @@ datadir = homedir + '/' + 'git/Data' #======= in_filename = 'aa_codes.csv' infile = datadir + '/' + in_filename -print('Input filename:', in_filename - , '\nInput path:', datadir - , '\n============================================================') +#print('Input filename:', in_filename +# , '\nInput path:', datadir +# , '\n============================================================') #%% Read input file aa_table = pd.read_csv(infile) #20, 6 @@ -67,11 +67,11 @@ my_aa.index # using 'index' creates a dict of dicts # using 'records' creates a list of dicts my_aa_dict = my_aa.to_dict('index') #20, with 5 subkeys -print('Printing my_aa_dict:', my_aa_dict.keys()) +#print('Printing my_aa_dict:', my_aa_dict.keys()) #FIXME : use the below in all code low_3letter_dict = my_aa.to_dict('index') #20, with 5 subkeys -print('Printing lower-case 3 letter aa dict:',low_3letter_dict.keys()) +#print('Printing lower-case 3 letter aa dict:',low_3letter_dict.keys()) #------------------------ #2) 1-letter code as key @@ -81,7 +81,7 @@ aa_1let.columns aa_1let.index oneletter_aa_dict = aa_1let.to_dict('index') #20, with 5 subkeys -print('Printing one letter aa dict:', oneletter_aa_dict.keys()) +#print('Printing one letter aa dict:', oneletter_aa_dict.keys()) #------------------------ #3) amino acid name as key @@ -91,7 +91,7 @@ aa_name.columns aa_name.index aa_name_dict = aa_name.to_dict('index') #20, with 5 subkeys -print('Printing amino acid names aa dict:', aa_name_dict.keys()) +#print('Printing amino acid names aa dict:', aa_name_dict.keys()) #------------------------ #3) 3 letter uppercase as key @@ -101,7 +101,7 @@ aa_up3let.columns aa_up3let.index up_3letter_aa_dict = aa_up3let.to_dict('index') #20, with 5 subkeys -print('Printing upper case 3 letter aa dict:', up_3letter_aa_dict.keys()) +#print('Printing upper case 3 letter aa dict:', up_3letter_aa_dict.keys()) #================================================ # dict of aa with their corresponding properties diff --git a/scripts/running_scripts b/scripts/running_scripts index 9889ec8..60d5717 100644 --- a/scripts/running_scripts +++ b/scripts/running_scripts @@ -1,3 +1,8 @@ +#======== +# data extraction: Must be run first to extract mutations for each drug-gene combination +#======== +./data_extraction.py -d pyrazinamide -g pncA + #======== # foldx #========