saving work, ready for more remote working
This commit is contained in:
parent
13203e6fe0
commit
dcd9a985ec
1 changed files with 27 additions and 1 deletions
|
@ -107,6 +107,11 @@ infile_master_v4 = datadir + '/' + in_filename_v4
|
||||||
print('Input file v4: ', infile_master_v4
|
print('Input file v4: ', infile_master_v4
|
||||||
, '\n============================================================')
|
, '\n============================================================')
|
||||||
|
|
||||||
|
in_filename_v5 = 'mtb_gwas_meta_v5.csv' #34k
|
||||||
|
infile_master_v5 = datadir + '/' + in_filename_v5
|
||||||
|
print('Input file v4: ', infile_master_v5
|
||||||
|
, '\n============================================================')
|
||||||
|
|
||||||
#=======
|
#=======
|
||||||
# output
|
# output
|
||||||
#=======
|
#=======
|
||||||
|
@ -121,6 +126,7 @@ print('Output filename: in the respective sections'
|
||||||
master_data_v2 = pd.read_csv(infile_master_v2, sep = ',', dtype = 'unicode') # ascii
|
master_data_v2 = pd.read_csv(infile_master_v2, sep = ',', dtype = 'unicode') # ascii
|
||||||
master_data_v3 = pd.read_csv(infile_master_v3, sep = ',', dtype = 'unicode')
|
master_data_v3 = pd.read_csv(infile_master_v3, sep = ',', dtype = 'unicode')
|
||||||
master_data_v4 = pd.read_csv(infile_master_v4, sep = ',', dtype = 'unicode')
|
master_data_v4 = pd.read_csv(infile_master_v4, sep = ',', dtype = 'unicode')
|
||||||
|
master_data_v5 = pd.read_csv(infile_master_v5, sep = ',', dtype = 'unicode')
|
||||||
#DtypeWarning: Columns (48) have mixed types.Specify dtype option on import or set low_memory=False.
|
#DtypeWarning: Columns (48) have mixed types.Specify dtype option on import or set low_memory=False.
|
||||||
# interactivity=interactivity, compiler=compiler, result=result)
|
# interactivity=interactivity, compiler=compiler, result=result)
|
||||||
|
|
||||||
|
@ -188,6 +194,26 @@ v4 = master_data_v4[['id'
|
||||||
v4.isna().sum()
|
v4.isna().sum()
|
||||||
|
|
||||||
print('complete samples v4:', v4['id'].nunique() - v4[drug].isna().sum())
|
print('complete samples v4:', v4['id'].nunique() - v4[drug].isna().sum())
|
||||||
|
#==================================================================
|
||||||
|
v5_na = master_data_v5.isna().sum()
|
||||||
|
v5_na.name = "v5_na_count"
|
||||||
|
v5_na = v5_na.to_frame()
|
||||||
|
v5_na['v4_na_percent'] = master_data_v5.isna().mean().round(4)*100
|
||||||
|
|
||||||
|
v5 = master_data_v5[['id'
|
||||||
|
, 'country'
|
||||||
|
, 'lineage'
|
||||||
|
, 'sublineage'
|
||||||
|
, drug
|
||||||
|
, dr_muts_col
|
||||||
|
, other_muts_col]]
|
||||||
|
|
||||||
|
v5.isna().sum()
|
||||||
|
|
||||||
|
print('complete samples v5:', v5['id'].nunique() - v5[drug].isna().sum())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#====================================================================
|
#====================================================================
|
||||||
# checking ids
|
# checking ids
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue