From e6faf80c202bfc85c55c0e7691506f3eb3284b19 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 14 Apr 2022 10:36:08 +0100 Subject: [PATCH] updating ambiguous muts manipulation section in data_extraction_v2 --- scripts/data_extraction_v2.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/data_extraction_v2.py b/scripts/data_extraction_v2.py index aa7ec93..c041db5 100644 --- a/scripts/data_extraction_v2.py +++ b/scripts/data_extraction_v2.py @@ -959,6 +959,18 @@ foo = bar.iloc[ambig_muts_rev_df.index] foo[['mutation', 'mutation_info', 'mutation_info_old']] # CHECK if there are still any ambiguous muts +muts_split_rev = list(bar.groupby('mutation_info')) +dr_muts_rev = muts_split_rev[0][1].mutation +other_muts_rev = muts_split_rev[1][1].mutation +print('splitting muts by mut_info:', muts_split_rev) +print('no.of dr_muts samples:', len(dr_muts_rev)) +print('no. of other_muts samples', len(other_muts_rev)) + +if not dr_muts_rev.isin(other_muts_rev).sum() & other_muts_rev.isin(dr_muts_rev).sum() > 0: + print('\nAmbiguous muts corrected. Proceeding with downstream analysis') +else: + print('\nAmbiguous muts corrected. Quitting!') + sys.exit() #%% ROUND THE HOUSES: DELETE foo = ambig_muts_rev_df[['mutation', 'mutation_info_REV']]