adding missing mutation col in combining_dfs
This commit is contained in:
parent
ddefcd7841
commit
645868ea27
1 changed files with 12 additions and 17 deletions
|
@ -427,42 +427,37 @@ count_na_mut_cols = combined_df_all[check_mut_cols].isna().sum().reset_index().r
|
||||||
if (count_na_mut_cols['na_count'].sum() > 0).any():
|
if (count_na_mut_cols['na_count'].sum() > 0).any():
|
||||||
# FIXME: static override, generate 'mutation' from variable
|
# FIXME: static override, generate 'mutation' from variable
|
||||||
na_muts_n = combined_df_all['mutation'].isna().sum()
|
na_muts_n = combined_df_all['mutation'].isna().sum()
|
||||||
baz = combined_df_all[combined_df_all['mutation'].isna()]
|
#baz = combined_df_all[combined_df_all['mutation'].isna()]
|
||||||
baz = baz[check_mut_cols]
|
|
||||||
print(na_muts_n, 'mutations have missing \'mutation\' info.'
|
print(na_muts_n, 'mutations have missing \'mutation\' info.'
|
||||||
, '\nFetching these from reference dict...')
|
, '\nFetching these from reference dict...')
|
||||||
|
else:
|
||||||
|
print('No missing \'mutation\' has been detected!')
|
||||||
|
|
||||||
|
|
||||||
lookup_dict = dict()
|
lookup_dict = dict()
|
||||||
for k, v in oneletter_aa_dict.items():
|
for k, v in oneletter_aa_dict.items():
|
||||||
lookup_dict[k] = v['three_letter_code_lower']
|
lookup_dict[k] = v['three_letter_code_lower']
|
||||||
print(lookup_dict)
|
print(lookup_dict)
|
||||||
wt_3let = combined_df_all['wild_type'].map(lookup_dict).str.capitalize()
|
wt_3let = combined_df_all['wild_type'].map(lookup_dict)
|
||||||
#print(wt_3let)
|
#print(wt_3let)
|
||||||
pos = combined_df_all['position'].astype(str)
|
pos = combined_df_all['position'].astype(str)
|
||||||
#print(pos)
|
#print(pos)
|
||||||
mt_3let = combined_df_all['mutant_type'].map(lookup_dict).str.capitalize()
|
mt_3let = combined_df_all['mutant_type'].map(lookup_dict)
|
||||||
#print(mt_3let)
|
#print(mt_3let)
|
||||||
baz['mutation'] = 'pnca_p.' + wt_3let + pos + mt_3let
|
# override the 'mutation' column
|
||||||
|
combined_df_all['mutation'] = 'pnca_p.' + wt_3let + pos + mt_3let
|
||||||
print(combined_df_all['mutation'])
|
print(combined_df_all['mutation'])
|
||||||
|
|
||||||
# populate mut_info_f2
|
# populate mut_info_f2
|
||||||
combined_df_all['mut_info_f2'] = combined_df_all['mutation'].str.replace(gene_match.lower(), 'p.', regex = True)
|
combined_df_all['mut_info_f2'] = combined_df_all['mutation'].str.replace(gene_match.lower(), 'p.', regex = True)
|
||||||
|
|
||||||
#%% merge
|
#%% check
|
||||||
#merging_cols_m7 = detect_common_cols(combined_df_all, baz)
|
#cols_check = check_mut_cols + ['mut_info_f1', 'mut_info_f2']
|
||||||
|
#foo = combined_df_all[cols_check]
|
||||||
baz2 = baz[['mutationinformation', 'mut_info_f2']]
|
|
||||||
baz2 = baz2.drop_duplicates()
|
|
||||||
merging_cols_m7 = detect_common_cols(combined_df_all, baz2)
|
|
||||||
|
|
||||||
combined_df_all2 = pd.merge(combined_df_all, baz2
|
|
||||||
#, on = merging_cols_m7
|
|
||||||
, on = 'mutationinformation'
|
|
||||||
, how = o_join)
|
|
||||||
|
|
||||||
#%%============================================================================
|
#%%============================================================================
|
||||||
output_cols = combined_df_all.columns
|
output_cols = combined_df_all.columns
|
||||||
print('Output cols:', output_cols)
|
#print('Output cols:', output_cols)
|
||||||
|
|
||||||
#%%============================================================================
|
#%%============================================================================
|
||||||
# write csv
|
# write csv
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue