added different scaling options
This commit is contained in:
parent
ebef0c7967
commit
8d831f3613
3 changed files with 99 additions and 31 deletions
|
@ -14,7 +14,8 @@ sys.path
|
|||
# import
|
||||
from GetMLData import *
|
||||
from SplitTTS import *
|
||||
from MultClfs_fi import *
|
||||
#from MultClfs_fi import *
|
||||
from MultClfs import *
|
||||
|
||||
#%%
|
||||
# X,y = load_boston(return_X_y=True)
|
||||
|
@ -33,7 +34,7 @@ from MultClfs_fi import *
|
|||
|
||||
#%%
|
||||
|
||||
sel_cv = StratifiedKFold(n_splits = 10
|
||||
skf_cv = StratifiedKFold(n_splits = 10
|
||||
, shuffle = True,**rs)
|
||||
#sel_cv = logo
|
||||
# sel_cv = RepeatedStratifiedKFold(n_splits = 5
|
||||
|
@ -48,10 +49,21 @@ gene_model_paramD = {'data_combined_model' : False
|
|||
|
||||
#df = getmldata(gene, drug, **gene_model_paramD)
|
||||
df = getmldata('pncA', 'pyrazinamide', **gene_model_paramD)
|
||||
df = getmldata('embB', 'ethambutol' , **gene_model_paramD)
|
||||
df = getmldata('katG', 'isoniazid' , **gene_model_paramD)
|
||||
df = getmldata('rpoB', 'rifampicin' , **gene_model_paramD)
|
||||
df = getmldata('gid' , 'streptomycin' , **gene_model_paramD)
|
||||
#df = getmldata('alr' , 'cycloserine' , **combined_model_paramD)
|
||||
all(df.columns.isin(['gene_name'])) # should be False
|
||||
|
||||
|
||||
spl_type = '70_30'
|
||||
spl_type = '80_20'
|
||||
spl_type = 'sl'
|
||||
|
||||
df2 = split_tts(df
|
||||
, data_type = 'actual'
|
||||
, split_type = '70_30'
|
||||
, split_type = spl_type
|
||||
, oversampling = False
|
||||
, dst_colname = 'dst'
|
||||
, target_colname = 'dst_mode'
|
||||
|
@ -61,19 +73,43 @@ df2 = split_tts(df
|
|||
|
||||
all(df2['X'].columns.isin(['gene_name'])) # should be False
|
||||
|
||||
fooD = MultClfs_fi (input_df = df2['X']
|
||||
fooD = MultModelsCl(input_df = df2['X']
|
||||
, target = df2['y']
|
||||
, sel_cv = sel_cv
|
||||
, sel_cv = skf_cv
|
||||
, run_blind_test = True
|
||||
, blind_test_df = df2['X_bts']
|
||||
, blind_test_target = df2['y_bts']
|
||||
, tts_split_type = '70_30'
|
||||
, var_type = 'mixed'
|
||||
, tts_split_type = spl_type
|
||||
, resampling_type = 'none' # default
|
||||
)
|
||||
, var_type = ['mixed']
|
||||
, scale_numeric = ['min_max_neg']
|
||||
, return_formatted_output = False
|
||||
|
||||
)
|
||||
|
||||
for k, v in fooD.items():
|
||||
print('\nModel:', k
|
||||
, '\nTRAIN MCC:', fooD[k]['test_mcc']
|
||||
, '\nBTS MCC:' , fooD[k]['bts_mcc']
|
||||
, '\nDIFF:',fooD[k]['bts_mcc'] - fooD[k]['test_mcc'] )
|
||||
, '\nDIFF:',fooD[k]['bts_mcc'] - fooD[k]['test_mcc'] )
|
||||
|
||||
#%% CHECK SCALING
|
||||
embb_df = getmldata('embB', 'ethambutol' , **combined_model_paramD)
|
||||
all(embb_df.columns.isin(['gene_name'])) # should be False
|
||||
|
||||
scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
bar = embb_df[['vdwclashes_rr', 'electro_rr']]
|
||||
bar_df1 = scaler.fit_transform(bar)
|
||||
bar_df1 = pd.DataFrame(bar_df1)
|
||||
bar_df1.rename(columns = {0:'vdw_scaled', 1: 'ele_scaled'}, inplace = True)
|
||||
bar2 = pd.concat([bar, bar_df1], axis = 1)
|
||||
|
||||
|
||||
scaler2 = StandardScaler()
|
||||
baz = embb_df[['vdwclashes_rr', 'electro_rr']]
|
||||
baz_df1 = scaler2.fit_transform(baz)
|
||||
baz_df1 = pd.DataFrame(baz_df1)
|
||||
baz_df1.rename(columns = {0:'vdw_scaled', 1: 'ele_scaled'}, inplace = True)
|
||||
baz2 = pd.concat([baz, baz_df1], axis = 1)
|
||||
|
||||
a = pd.concat([bar2, baz2], axis = 1)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue