added distinct lin count for each mutation
This commit is contained in:
parent
67d9e6160a
commit
28d0d68413
1 changed files with 13 additions and 3 deletions
|
@ -175,6 +175,15 @@ data2.drop(['mutation'], axis=1, inplace=True)
|
||||||
|
|
||||||
#%% Process lineage info
|
#%% Process lineage info
|
||||||
# add how many different lineages a sample is represented in?
|
# add how many different lineages a sample is represented in?
|
||||||
|
# https://stackoverflow.com/questions/37189878/pandas-add-column-to-groupby-dataframe
|
||||||
|
# https://stackoverflow.com/questions/43847520/how-to-get-the-distinct-count-of-values-in-a-python-pandas-dataframe
|
||||||
|
data2.groupby('mutationinformation')['lineage'].size() # sample count
|
||||||
|
data2.groupby('mutationinformation')['sample'].size()
|
||||||
|
data2.groupby('mutationinformation')['lineage'].value_counts()
|
||||||
|
|
||||||
|
data2.groupby('mutationinformation')['lineage'].nunique()
|
||||||
|
data2['lin_count'] = data2['mutationinformation'].map(data2.groupby('mutationinformation')['lineage'].nunique())
|
||||||
|
|
||||||
#%% subset: equivalent of merged_df3?
|
#%% subset: equivalent of merged_df3?
|
||||||
# https://stackoverflow.com/questions/39900061/sort-lists-in-a-pandas-dataframe-column
|
# https://stackoverflow.com/questions/39900061/sort-lists-in-a-pandas-dataframe-column
|
||||||
|
|
||||||
|
@ -185,6 +194,7 @@ data2.drop(['mutation'], axis=1, inplace=True)
|
||||||
# data2['dst_multimode'].value_counts()
|
# data2['dst_multimode'].value_counts()
|
||||||
# data2.sort_values(['dst_multimode'], ascending=False)
|
# data2.sort_values(['dst_multimode'], ascending=False)
|
||||||
|
|
||||||
data_df3 = data2.drop_duplicates(['mutationinformation'])
|
#data_df3 = data2.drop_duplicates(['mutationinformation'])
|
||||||
data_df3_v2 = data2.drop_duplicates(['mutationinformation'])
|
#data_df3_v2 = data2.drop_duplicates(['mutationinformation'])
|
||||||
all(data_df3 == data_df3_v2)
|
#all(data_df3 == data_df3_v2)
|
||||||
|
#%%
|
Loading…
Add table
Add a link
Reference in a new issue