From e41fb78e37ba1913ec1a3228a9a7c4ad563c5e6e Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 15 Jul 2020 13:36:20 +0100 Subject: [PATCH] saved work before adding plots --- scripts/combining_dfs.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/scripts/combining_dfs.py b/scripts/combining_dfs.py index d5253b8..fef2779 100755 --- a/scripts/combining_dfs.py +++ b/scripts/combining_dfs.py @@ -56,28 +56,28 @@ os.getcwd() from combining_FIXME import detect_common_cols #======================================================================= #%% command line args -#arg_parser = argparse.ArgumentParser() -#arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide') -#arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive +arg_parser = argparse.ArgumentParser() +arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide') +arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive -#arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data') -#arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + + input') -#arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + + output') +arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data') +arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + + input') +arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + + output') -#arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') +arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') -#args = arg_parser.parse_args() +args = arg_parser.parse_args() #======================================================================= #%% variable assignment: input and output -drug = 'pyrazinamide' -gene = 'pncA' -gene_match = gene + '_p.' +#drug = 'pyrazinamide' +#gene = 'pncA' +#gene_match = gene + '_p.' -#drug = args.drug -#gene = args.gene -#datadir = args.datadir -#indir = args.input_dir -#outdir = args.output_dir +drug = args.drug +gene = args.gene +datadir = args.datadir +indir = args.input_dir +outdir = args.output_dir #%%======================================================================= #============== # directories @@ -385,7 +385,7 @@ print('Checking mutations in the two dfs:' #print('\nNo. of common muts:', np.intersect1d(combined_df['mutationinformation'], ors_df_ordered['mutationinformation']) ) #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -combined_df_all = pd.merge(combined_df, ors_df, on = merging_cols_m7, how = o_join) +combined_df_all = pd.merge(combined_df, ors_df, on = merging_cols_m7, how = l_join) #combined_df_all.shape # FIXME: DIM @@ -396,7 +396,7 @@ outdf_expected_cols = len(combined_df.columns) + len(ors_df_ordered.columns) - l #if combined_df_all.shape[1] == outdf_expected_cols and combined_df_all.shape[0] == outdf_expected_rows: if combined_df_all.shape[1] == outdf_expected_cols and combined_df_all['mutationinformation'].nunique() == outdf_expected_rows: print('PASS: Df dimension match' - , '\nDim of combined_df_all with join type:', o_join + , '\nDim of combined_df_all with join type:', l_join , '\n', combined_df_all.shape , '\n===============================================================') else: