updated script to combine dfs

This commit is contained in:
Tanushree Tunstall 2020-08-21 13:22:28 +01:00
parent 208e0b6f62
commit 59cb57a795
3 changed files with 98 additions and 112 deletions

View file

@ -261,6 +261,20 @@ else:
sys.exit()
del(df_ncols, ncols_add)
#%% now adding mutation style = <gene>_p.abc1cde
dfm2_mis['mutation'] = gene.lower() + '_' + dfm2_mis['mut_info_f2'].astype(str)
# convert to lowercase
dfm2_mis['mutation'] = dfm2_mis['mutation'].str.lower()
# quick sanity check
check = dfm2_mis['mutation'].value_counts().value_counts() == dfm2_mis['mut_info_f2'].value_counts().value_counts()
if check.all():
print('PASS: added column "mutation" containing mutation format: <gene>_p.abc1cde')
else:
print('FAIL: could not add "mutation" column!')
sys.exit()
#%% Calculating OR from beta coeff
print('Calculating OR...')
df_ncols = dfm2_mis.shape[1]
@ -364,7 +378,7 @@ print('Reordering', dfm2_mis.shape[1], 'columns'
#dfm2_mis.columns
column_order = [#'mutation',
column_order = ['mutation',
'mutationinformation',
'wild_type',
'position',