updated counts.py with wt seq counts

This commit is contained in:
Tanushree Tunstall 2021-03-03 11:54:48 +00:00
parent 88229860e2
commit 59430a49dd
2 changed files with 19 additions and 1 deletions

View file

@ -310,3 +310,20 @@ n_stop = tot3/len(meta_data)
n_stop*100 #0.11
all_tot = n_del*100 + n_ins*100 + n_stop*100 #1.2
#%% count pncA WT
wt_gene = meta_data.loc[meta_data[dr_muts_col].str.contains('WT', na = False, regex = True, case = False) | meta_data[other_muts_col].str.contains('WT', na = False, regex = True, case = False) ]
meta_data['muts_and_lineage'] = meta_data[dr_muts_col] + meta_data[other_muts_col] + meta_data['lineage']
wt_gene_v2 = meta_data.loc[meta_data['muts_and_lineage'].str.contains('*WT*lineage1', na = False, regex = True, case = False)]
lin1_wt_gene = wt_gene.loc[wt_gene['lineage'].str.contains('lineage1', na = False, regex = True, case = False)]
dr_lin1 = wt_gene.groupby(['lineage'])[dr_muts_col].apply(lambda x: x[x.str.contains('WT')].count())
other_lin1 = wt_gene.groupby(['lineage'])[other_muts_col].apply(lambda x: x[x.str.contains('WT')].count())
dr_lin1_v2 = wt_gene.groupby(['lineage'])[dr_muts_col].value_counts()
other_lin1_v2 = wt_gene.groupby(['lineage'])[other_muts_col].value_counts()