saving from work

This commit is contained in:
Tanushree Tunstall 2020-02-27 15:16:20 +00:00
parent 61f8dc57c9
commit a5356cf88b
2 changed files with 33 additions and 9 deletions

View file

@ -118,14 +118,16 @@ clear variables
meta_pza = meta_data.loc[meta_data.dr_mutations_pyrazinamide.str.contains('pncA_p.*', na = False)]
#2163 {RESULT: samples with dr_muts}
dr_id = meta_pza['id'].unique()
dr_id = pd.Series(dr_id)
meta_pza = meta_data.loc[meta_data.other_mutations_pyrazinamide.str.contains('pncA_p.*', na = False)]
#526 (RESULT: samples with other_muts)
other_id = meta_pza['id'].unique()
other_id = pd.Series(other_id)
# FIXME: See if the sample ids are unique in each
# find any common IDs
dr_id.isin(other_id[1,1])
dr_id.isin(other_id).sum()
del(meta_pza)
@ -159,8 +161,7 @@ del(meta_pnca_other)
# Now extract "all" mutations
meta_pnca_all = meta_data_pnca.loc[meta_data_pnca.dr_mutations_pyrazinamide.str.contains('pncA_p.*') | meta_data_pnca.other_mutations_pyrazinamide.str.contains('pncA_p.*') ]
meta_pnca_all['id'].nunique() {#RESULT: pnca mutations in ALL samples}
meta_pnca_all['id'].nunique() #RESULT: pnca mutations in ALL samples}
pnca_samples = len(meta_pnca_all)
pnca_na = meta_pnca_all['pyrazinamide'].isna().sum()
comp_pnca_samples = pnca_samples - pnca_na
@ -170,8 +171,8 @@ comp_pnca_samples = pnca_samples - pnca_na
#=#=#=#=#=#=#
# sanity checks
meta_pnca_all.dr_mutations_pyrazinamide.value_counts()
meta_pnca_all.other_mutations_pyrazinamide.value_counts()
foo1 = meta_pnca_all.dr_mutations_pyrazinamide.value_counts()
foo2 = meta_pnca_all.other_mutations_pyrazinamide.value_counts()
# more sanity checks
# !CAUTION!: muts will change depending on your gene
@ -182,7 +183,7 @@ meta_pnca_all.loc[meta_pnca_all.dr_mutations_pyrazinamide.str.contains('pncA_p.P
meta_pnca_all.loc[meta_pnca_all.dr_mutations_pyrazinamide.str.contains('pncA_p.Val139Leu')]
meta_pnca_all.loc[meta_pnca_all.dr_mutations_pyrazinamide.str.contains('pncA_p.')] # exists # rows
m = meta_pnca_all.loc[meta_pnca_all.dr_mutations_pyrazinamide.str.contains('pncA_p.')] # exists # rows
meta_pnca_all.loc[meta_pnca_all.dr_mutations_pyrazinamide.str.contains('pncA_p.')] # exists # rows
# other_muts
meta_pnca_all.loc[meta_pnca_all.other_mutations_pyrazinamide.str.contains('pncA_p.Gln10Pro*')] # empty