fixed masking condition for ML training data for genes and wrote revised mask files out

This commit is contained in:
Tanushree Tunstall 2022-07-27 13:36:16 +01:00
parent 0adf69f75a
commit f4cab1fdfb
3 changed files with 46 additions and 26 deletions

View file

@ -18,7 +18,6 @@ from SplitTTS import *
from MultClfs_SIMPLE import *
#%%
skf_cv = StratifiedKFold(n_splits = 10
, shuffle = True,**rs)
#sel_cv = logo
@ -29,16 +28,16 @@ skf_cv = StratifiedKFold(n_splits = 10
gene_model_paramD = {'data_combined_model' : False
, 'use_or' : False
, 'omit_all_genomic_features': False
, 'write_maskfile' : False
, 'write_maskfile' : True
, 'write_outfile' : False }
#df = getmldata(gene, drug, **gene_model_paramD)
df = getmldata('pncA', 'pyrazinamide', **gene_model_paramD)
df = getmldata('embB', 'ethambutol' , **gene_model_paramD)
df = getmldata('katG', 'isoniazid' , **gene_model_paramD)
df = getmldata('rpoB', 'rifampicin' , **gene_model_paramD)
df = getmldata('gid' , 'streptomycin' , **gene_model_paramD)
#df = getmldata('alr' , 'cycloserine' , **combined_model_paramD)
#df = getmldata('embB', 'ethambutol' , **gene_model_paramD)
#df = getmldata('katG', 'isoniazid' , **gene_model_paramD)
#df = getmldata('rpoB', 'rifampicin' , **gene_model_paramD)
#df = getmldata('gid' , 'streptomycin' , **gene_model_paramD)
#df = getmldata('alr' , 'cycloserine' , **gene_model_paramD)
all(df.columns.isin(['gene_name'])) # should be False
spl_type = '70_30'