saving data_extraction from home

This commit is contained in:
Tanushree Tunstall 2020-01-28 10:10:16 +00:00
parent c3c50f65f2
commit 15391a5700

View file

@ -88,10 +88,10 @@ meta_data = meta_data[['id'
, 'pyrazinamide'
, 'dr_mutations_pyrazinamide'
, 'other_mutations_pyrazinamide'
]] #19265, 67
]]
# checks
total_samples = meta_data['id'].nunique() # 19265
total_samples = meta_data['id'].nunique()
# counts NA per column
meta_data.isna().sum()
@ -101,9 +101,7 @@ meta_data.head()
# equivalent of table in R
# pyrazinamide counts
meta_data.pyrazinamide.value_counts() #12511
#0.0 10565
#1.0 1946 {RESULT: No. of Resistant and Susceptible samples}
meta_data.pyrazinamide.value_counts()
clear variables
#del(basedir, datadir, inpath, infile)
@ -160,7 +158,7 @@ del(meta_pnca_other)
# Now extract "all" mutations
meta_pnca_all = meta_data_pnca.loc[meta_data_pnca.dr_mutations_pyrazinamide.str.contains('pncA_p.*') | meta_data_pnca.other_mutations_pyrazinamide.str.contains('pncA_p.*') ]
#2665, 8
meta_pnca_all['id'].nunique() {#RESULT: pnca mutations in ALL samples}
pnca_samples = len(meta_pnca_all)