saving work, lineage bits are all over the place, need to rerrange

This commit is contained in:
Tanushree Tunstall 2022-04-14 19:39:47 +01:00
parent ae3a5500c9
commit e99c169b35

View file

@ -1712,6 +1712,11 @@ lineage_label_map = {'lineage1' : 'L1'
, 'lineageBOV' : 'LBOV'} , 'lineageBOV' : 'LBOV'}
foo['lineage'].value_counts() foo['lineage'].value_counts()
foo_updated = foo.replace(to_replace ='lineage', value = 'L', regex = True) # works
foo['lineage_labels'] = foo['lineage']
#df['team'] = df['team'].apply(lambda x: re.sub(r'[\n\r]*','', str(x)))
foo['lineage_labels'] = foo['lineage'].apply(lambda x: re.sub(r'lineage','L', str(x)))
lineage_label_numeric = {'lineage1' : 1 lineage_label_numeric = {'lineage1' : 1
, 'lineage2' : 2 , 'lineage2' : 2
@ -1736,6 +1741,7 @@ c2 = foo2[foo2.loc[:, 'MUT'].isin(['A102P'])]
c2['lineage_numeric'].value_counts() c2['lineage_numeric'].value_counts()
#%% Lineage counts (including the ones containing multiple entries) #%% Lineage counts (including the ones containing multiple entries)
# Get information about how many distinct lineages each mutation comes from # Get information about how many distinct lineages each mutation comes from