saved work before adding plots

2020-07-15 13:36:20 +01:00 · 2020-07-15 13:36:20 +01:00 · acc6a42880
commit acc6a42880
parent f8fef60475
1 changed files with 18 additions and 18 deletions
--- a/scripts/combining_dfs.py
+++ b/scripts/combining_dfs.py
@ -56,28 +56,28 @@ os.getcwd()
 from combining_FIXME import detect_common_cols
 #=======================================================================
 #%% command line args
-#arg_parser = argparse.ArgumentParser()
+arg_parser = argparse.ArgumentParser()
-#arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide')
+arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide')
-#arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive
+arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive
-#arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
+arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
-#arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
+arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
-#arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
+arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
-#arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode')
+arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode')
-#args = arg_parser.parse_args()
+args = arg_parser.parse_args()
 #=======================================================================
 #%% variable assignment: input and output 
-drug = 'pyrazinamide'
+#drug = 'pyrazinamide'
-gene = 'pncA'
+#gene = 'pncA'
-gene_match = gene + '_p.'
+#gene_match = gene + '_p.'
-#drug = args.drug
+drug    = args.drug
-#gene = args.gene
+gene    = args.gene
-#datadir      = args.datadir
+datadir = args.datadir
-#indir        = args.input_dir
+indir   = args.input_dir
-#outdir       = args.output_dir
+outdir  = args.output_dir
 #%%=======================================================================
 #==============
 # directories
@ -385,7 +385,7 @@ print('Checking mutations in the two dfs:'
 #print('\nNo. of common muts:', np.intersect1d(combined_df['mutationinformation'], ors_df_ordered['mutationinformation']) )
 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-combined_df_all = pd.merge(combined_df, ors_df, on = merging_cols_m7, how = o_join)
+combined_df_all = pd.merge(combined_df, ors_df, on = merging_cols_m7, how = l_join)
 #combined_df_all.shape
 # FIXME: DIM
@ -396,7 +396,7 @@ outdf_expected_cols = len(combined_df.columns) + len(ors_df_ordered.columns) - l
 #if combined_df_all.shape[1] == outdf_expected_cols and combined_df_all.shape[0] == outdf_expected_rows:  
 if combined_df_all.shape[1] == outdf_expected_cols and combined_df_all['mutationinformation'].nunique() == outdf_expected_rows:
    print('PASS: Df dimension match'
-          , '\nDim of combined_df_all with join type:', o_join
+          , '\nDim of combined_df_all with join type:', l_join
          , '\n', combined_df_all.shape
          , '\n===============================================================')
 else: