saved work before adding plots

This commit is contained in:
Tanushree Tunstall 2020-07-15 13:36:20 +01:00
parent e4270b67c8
commit e41fb78e37

View file

@ -56,28 +56,28 @@ os.getcwd()
from combining_FIXME import detect_common_cols
#=======================================================================
#%% command line args
#arg_parser = argparse.ArgumentParser()
#arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide')
#arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide')
arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive
#arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
#arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
#arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
#arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode')
arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode')
#args = arg_parser.parse_args()
args = arg_parser.parse_args()
#=======================================================================
#%% variable assignment: input and output
drug = 'pyrazinamide'
gene = 'pncA'
gene_match = gene + '_p.'
#drug = 'pyrazinamide'
#gene = 'pncA'
#gene_match = gene + '_p.'
#drug = args.drug
#gene = args.gene
#datadir = args.datadir
#indir = args.input_dir
#outdir = args.output_dir
drug = args.drug
gene = args.gene
datadir = args.datadir
indir = args.input_dir
outdir = args.output_dir
#%%=======================================================================
#==============
# directories
@ -385,7 +385,7 @@ print('Checking mutations in the two dfs:'
#print('\nNo. of common muts:', np.intersect1d(combined_df['mutationinformation'], ors_df_ordered['mutationinformation']) )
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
combined_df_all = pd.merge(combined_df, ors_df, on = merging_cols_m7, how = o_join)
combined_df_all = pd.merge(combined_df, ors_df, on = merging_cols_m7, how = l_join)
#combined_df_all.shape
# FIXME: DIM
@ -396,7 +396,7 @@ outdf_expected_cols = len(combined_df.columns) + len(ors_df_ordered.columns) - l
#if combined_df_all.shape[1] == outdf_expected_cols and combined_df_all.shape[0] == outdf_expected_rows:
if combined_df_all.shape[1] == outdf_expected_cols and combined_df_all['mutationinformation'].nunique() == outdf_expected_rows:
print('PASS: Df dimension match'
, '\nDim of combined_df_all with join type:', o_join
, '\nDim of combined_df_all with join type:', l_join
, '\n', combined_df_all.shape
, '\n===============================================================')
else: