saving data_extraction from home

This commit is contained in:
Tanushree Tunstall 2020-01-28 10:10:16 +00:00
parent be213cb7e9
commit 4bcb81e9be

View file

@ -88,10 +88,10 @@ meta_data = meta_data[['id'
, 'pyrazinamide' , 'pyrazinamide'
, 'dr_mutations_pyrazinamide' , 'dr_mutations_pyrazinamide'
, 'other_mutations_pyrazinamide' , 'other_mutations_pyrazinamide'
]] #19265, 67 ]]
# checks # checks
total_samples = meta_data['id'].nunique() # 19265 total_samples = meta_data['id'].nunique()
# counts NA per column # counts NA per column
meta_data.isna().sum() meta_data.isna().sum()
@ -101,9 +101,7 @@ meta_data.head()
# equivalent of table in R # equivalent of table in R
# pyrazinamide counts # pyrazinamide counts
meta_data.pyrazinamide.value_counts() #12511 meta_data.pyrazinamide.value_counts()
#0.0 10565
#1.0 1946 {RESULT: No. of Resistant and Susceptible samples}
clear variables clear variables
#del(basedir, datadir, inpath, infile) #del(basedir, datadir, inpath, infile)
@ -160,7 +158,7 @@ del(meta_pnca_other)
# Now extract "all" mutations # Now extract "all" mutations
meta_pnca_all = meta_data_pnca.loc[meta_data_pnca.dr_mutations_pyrazinamide.str.contains('pncA_p.*') | meta_data_pnca.other_mutations_pyrazinamide.str.contains('pncA_p.*') ] meta_pnca_all = meta_data_pnca.loc[meta_data_pnca.dr_mutations_pyrazinamide.str.contains('pncA_p.*') | meta_data_pnca.other_mutations_pyrazinamide.str.contains('pncA_p.*') ]
#2665, 8
meta_pnca_all['id'].nunique() {#RESULT: pnca mutations in ALL samples} meta_pnca_all['id'].nunique() {#RESULT: pnca mutations in ALL samples}
pnca_samples = len(meta_pnca_all) pnca_samples = len(meta_pnca_all)