diff --git a/scripts/data_extraction.py b/scripts/data_extraction.py index 262175e..b8694b5 100755 --- a/scripts/data_extraction.py +++ b/scripts/data_extraction.py @@ -122,7 +122,7 @@ outdir = datadir + '/' + drug + '/' + 'output' # input #======= #in_filename_master_master = 'original_tanushree_data_v2.csv' #19k -in_filename_master = 'mtb_gwas_meta_v5.csv' #34k +in_filename_master = 'mtb_gwas_meta_v6.csv' #35k infile_master = datadir + '/' + in_filename_master print('Input file: ', infile_master , '\n============================================================') @@ -153,8 +153,7 @@ if in_filename_master == 'original_tanushree_data_v2.csv': , drug , dr_muts_col , other_muts_col]] - -if in_filename_master == 'mtb_gwas_meta_v5.csv': +else: core_cols = ['id' , 'sample' , 'patient_id' @@ -174,12 +173,13 @@ if in_filename_master == 'mtb_gwas_meta_v5.csv': , 'hiv_status' , 'HIV_status' , 'tissue_type' - , 'isolation_source'] + , 'isolation_source' + , resistance_col] variable_based_cols = [drug , dr_muts_col - , other_muts_col - , resistance_col] + , other_muts_col] + #, resistance_col] cols_to_extract = core_cols + variable_based_cols print('Extracting', len(cols_to_extract), 'columns from master data') @@ -343,7 +343,7 @@ if in_filename_master == 'original_tanushree_data_v2.csv': ,'drtype' , drug , dr_muts_col]] -if in_filename_master == 'mtb_gwas_meta_v3.csv': +else: dr_based_cols = [drug, dr_muts_col] cols_to_extract = core_cols + dr_based_cols print('Extracting', len(cols_to_extract), 'columns from meta data') @@ -392,9 +392,8 @@ if in_filename_master == 'original_tanushree_data_v2.csv': , 'sublineage' , 'drtype' , drug - , other_muts_col]] - -if in_filename_master == 'mtb_gwas_meta_v3.csv': + , other_muts_col]] +else: other_based_cols = [drug, other_muts_col] cols_to_extract = core_cols + other_based_cols print('Extracting', len(cols_to_extract), 'columns from meta data')