adding missing mutation col in combining_dfs
This commit is contained in:
parent
ddefcd7841
commit
645868ea27
1 changed files with 12 additions and 17 deletions
|
@ -427,42 +427,37 @@ count_na_mut_cols = combined_df_all[check_mut_cols].isna().sum().reset_index().r
|
|||
if (count_na_mut_cols['na_count'].sum() > 0).any():
|
||||
# FIXME: static override, generate 'mutation' from variable
|
||||
na_muts_n = combined_df_all['mutation'].isna().sum()
|
||||
baz = combined_df_all[combined_df_all['mutation'].isna()]
|
||||
baz = baz[check_mut_cols]
|
||||
#baz = combined_df_all[combined_df_all['mutation'].isna()]
|
||||
print(na_muts_n, 'mutations have missing \'mutation\' info.'
|
||||
, '\nFetching these from reference dict...')
|
||||
else:
|
||||
print('No missing \'mutation\' has been detected!')
|
||||
|
||||
|
||||
lookup_dict = dict()
|
||||
for k, v in oneletter_aa_dict.items():
|
||||
lookup_dict[k] = v['three_letter_code_lower']
|
||||
print(lookup_dict)
|
||||
wt_3let = combined_df_all['wild_type'].map(lookup_dict).str.capitalize()
|
||||
wt_3let = combined_df_all['wild_type'].map(lookup_dict)
|
||||
#print(wt_3let)
|
||||
pos = combined_df_all['position'].astype(str)
|
||||
#print(pos)
|
||||
mt_3let = combined_df_all['mutant_type'].map(lookup_dict).str.capitalize()
|
||||
mt_3let = combined_df_all['mutant_type'].map(lookup_dict)
|
||||
#print(mt_3let)
|
||||
baz['mutation'] = 'pnca_p.' + wt_3let + pos + mt_3let
|
||||
# override the 'mutation' column
|
||||
combined_df_all['mutation'] = 'pnca_p.' + wt_3let + pos + mt_3let
|
||||
print(combined_df_all['mutation'])
|
||||
|
||||
# populate mut_info_f2
|
||||
combined_df_all['mut_info_f2'] = combined_df_all['mutation'].str.replace(gene_match.lower(), 'p.', regex = True)
|
||||
|
||||
#%% merge
|
||||
#merging_cols_m7 = detect_common_cols(combined_df_all, baz)
|
||||
|
||||
baz2 = baz[['mutationinformation', 'mut_info_f2']]
|
||||
baz2 = baz2.drop_duplicates()
|
||||
merging_cols_m7 = detect_common_cols(combined_df_all, baz2)
|
||||
|
||||
combined_df_all2 = pd.merge(combined_df_all, baz2
|
||||
#, on = merging_cols_m7
|
||||
, on = 'mutationinformation'
|
||||
, how = o_join)
|
||||
#%% check
|
||||
#cols_check = check_mut_cols + ['mut_info_f1', 'mut_info_f2']
|
||||
#foo = combined_df_all[cols_check]
|
||||
|
||||
#%%============================================================================
|
||||
output_cols = combined_df_all.columns
|
||||
print('Output cols:', output_cols)
|
||||
#print('Output cols:', output_cols)
|
||||
|
||||
#%%============================================================================
|
||||
# write csv
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue