diff --git a/scripts/data_extraction_v2.py b/scripts/data_extraction_v2.py index 70aeae3..16212e3 100644 --- a/scripts/data_extraction_v2.py +++ b/scripts/data_extraction_v2.py @@ -1712,6 +1712,11 @@ lineage_label_map = {'lineage1' : 'L1' , 'lineageBOV' : 'LBOV'} foo['lineage'].value_counts() +foo_updated = foo.replace(to_replace ='lineage', value = 'L', regex = True) # works +foo['lineage_labels'] = foo['lineage'] + +#df['team'] = df['team'].apply(lambda x: re.sub(r'[\n\r]*','', str(x))) +foo['lineage_labels'] = foo['lineage'].apply(lambda x: re.sub(r'lineage','L', str(x))) lineage_label_numeric = {'lineage1' : 1 , 'lineage2' : 2 @@ -1736,6 +1741,7 @@ c2 = foo2[foo2.loc[:, 'MUT'].isin(['A102P'])] c2['lineage_numeric'].value_counts() + #%% Lineage counts (including the ones containing multiple entries) # Get information about how many distinct lineages each mutation comes from