saved work before adding plots

This commit is contained in:
Tanushree Tunstall 2020-07-15 13:36:20 +01:00
parent f8fef60475
commit acc6a42880

View file

@ -56,28 +56,28 @@ os.getcwd()
from combining_FIXME import detect_common_cols from combining_FIXME import detect_common_cols
#======================================================================= #=======================================================================
#%% command line args #%% command line args
#arg_parser = argparse.ArgumentParser() arg_parser = argparse.ArgumentParser()
#arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide') arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide')
#arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive
#arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data') arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
#arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input') arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
#arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output') arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
#arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode')
#args = arg_parser.parse_args() args = arg_parser.parse_args()
#======================================================================= #=======================================================================
#%% variable assignment: input and output #%% variable assignment: input and output
drug = 'pyrazinamide' #drug = 'pyrazinamide'
gene = 'pncA' #gene = 'pncA'
gene_match = gene + '_p.' #gene_match = gene + '_p.'
#drug = args.drug drug = args.drug
#gene = args.gene gene = args.gene
#datadir = args.datadir datadir = args.datadir
#indir = args.input_dir indir = args.input_dir
#outdir = args.output_dir outdir = args.output_dir
#%%======================================================================= #%%=======================================================================
#============== #==============
# directories # directories
@ -385,7 +385,7 @@ print('Checking mutations in the two dfs:'
#print('\nNo. of common muts:', np.intersect1d(combined_df['mutationinformation'], ors_df_ordered['mutationinformation']) ) #print('\nNo. of common muts:', np.intersect1d(combined_df['mutationinformation'], ors_df_ordered['mutationinformation']) )
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
combined_df_all = pd.merge(combined_df, ors_df, on = merging_cols_m7, how = o_join) combined_df_all = pd.merge(combined_df, ors_df, on = merging_cols_m7, how = l_join)
#combined_df_all.shape #combined_df_all.shape
# FIXME: DIM # FIXME: DIM
@ -396,7 +396,7 @@ outdf_expected_cols = len(combined_df.columns) + len(ors_df_ordered.columns) - l
#if combined_df_all.shape[1] == outdf_expected_cols and combined_df_all.shape[0] == outdf_expected_rows: #if combined_df_all.shape[1] == outdf_expected_cols and combined_df_all.shape[0] == outdf_expected_rows:
if combined_df_all.shape[1] == outdf_expected_cols and combined_df_all['mutationinformation'].nunique() == outdf_expected_rows: if combined_df_all.shape[1] == outdf_expected_cols and combined_df_all['mutationinformation'].nunique() == outdf_expected_rows:
print('PASS: Df dimension match' print('PASS: Df dimension match'
, '\nDim of combined_df_all with join type:', o_join , '\nDim of combined_df_all with join type:', l_join
, '\n', combined_df_all.shape , '\n', combined_df_all.shape
, '\n===============================================================') , '\n===============================================================')
else: else: