saving work, ready for more remote working
This commit is contained in:
parent
13203e6fe0
commit
dcd9a985ec
1 changed files with 27 additions and 1 deletions
|
@ -106,6 +106,11 @@ in_filename_v4 = 'mtb_gwas_meta_v4.csv' #34k
|
|||
infile_master_v4 = datadir + '/' + in_filename_v4
|
||||
print('Input file v4: ', infile_master_v4
|
||||
, '\n============================================================')
|
||||
|
||||
in_filename_v5 = 'mtb_gwas_meta_v5.csv' #34k
|
||||
infile_master_v5 = datadir + '/' + in_filename_v5
|
||||
print('Input file v4: ', infile_master_v5
|
||||
, '\n============================================================')
|
||||
|
||||
#=======
|
||||
# output
|
||||
|
@ -120,7 +125,8 @@ print('Output filename: in the respective sections'
|
|||
#%% Read input file
|
||||
master_data_v2 = pd.read_csv(infile_master_v2, sep = ',', dtype = 'unicode') # ascii
|
||||
master_data_v3 = pd.read_csv(infile_master_v3, sep = ',', dtype = 'unicode')
|
||||
master_data_v4 = pd.read_csv(infile_master_v4, sep = ',', dtype = 'unicode')
|
||||
master_data_v4 = pd.read_csv(infile_master_v4, sep = ',', dtype = 'unicode')
|
||||
master_data_v5 = pd.read_csv(infile_master_v5, sep = ',', dtype = 'unicode')
|
||||
#DtypeWarning: Columns (48) have mixed types.Specify dtype option on import or set low_memory=False.
|
||||
# interactivity=interactivity, compiler=compiler, result=result)
|
||||
|
||||
|
@ -188,6 +194,26 @@ v4 = master_data_v4[['id'
|
|||
v4.isna().sum()
|
||||
|
||||
print('complete samples v4:', v4['id'].nunique() - v4[drug].isna().sum())
|
||||
#==================================================================
|
||||
v5_na = master_data_v5.isna().sum()
|
||||
v5_na.name = "v5_na_count"
|
||||
v5_na = v5_na.to_frame()
|
||||
v5_na['v4_na_percent'] = master_data_v5.isna().mean().round(4)*100
|
||||
|
||||
v5 = master_data_v5[['id'
|
||||
, 'country'
|
||||
, 'lineage'
|
||||
, 'sublineage'
|
||||
, drug
|
||||
, dr_muts_col
|
||||
, other_muts_col]]
|
||||
|
||||
v5.isna().sum()
|
||||
|
||||
print('complete samples v5:', v5['id'].nunique() - v5[drug].isna().sum())
|
||||
|
||||
|
||||
|
||||
|
||||
#====================================================================
|
||||
# checking ids
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue