82 lines
No EOL
2.5 KiB
Python
Executable file
82 lines
No EOL
2.5 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Wed Mar 16 16:55:06 2022
|
|
|
|
@author: tanu
|
|
"""
|
|
# https://stackoverflow.com/questions/57248072/gridsearchcv-gives-different-result
|
|
#%% variables
|
|
mcc_score_fn = {'mcc': make_scorer(matthews_corrcoef)}
|
|
|
|
scoring_refit = {'scoring': 'recall'
|
|
,'refit': 'recall'}
|
|
|
|
scoring_refit = {'scoring': mcc_score_fn
|
|
, 'refit': 'mcc'}
|
|
|
|
scoring_refit = {'scoring': 'jaccard'
|
|
,'refit': 'jaccard'}
|
|
|
|
njobs = {'n_jobs': 10} # my desktop has 12 cores
|
|
skf_cv = StratifiedKFold(n_splits=10,shuffle = True)
|
|
cv = {'cv': 10}
|
|
|
|
#%% GridSearchCV
|
|
gs_dt = GridSearchCV(estimator=DecisionTreeClassifier(**rs
|
|
#,class_weight = {1:10, 0:1}
|
|
),
|
|
param_grid=[{'max_depth': [ 2, 4, 6, 8, 10]
|
|
, 'criterion':['gini','entropy']
|
|
, "max_features":["auto", None]
|
|
, "max_leaf_nodes":[10,20,30,40]}]
|
|
#, **cv
|
|
, cv = skf_cv
|
|
, **scoring_refit)
|
|
#, scoring = mcc_score_fn, refit = 'mcc')
|
|
|
|
#gs_dt.fit(X_train, y_train)
|
|
#gs_dt_fit = gs_dt.fit(X_train, y_train)
|
|
gs_dt.fit(X, y)
|
|
gs_dt_fit = gs_dt.fit(X, y)
|
|
gs_dt_fit_res = gs_dt_fit.cv_results_
|
|
|
|
print('Best model:\n', gs_dt.best_params_)
|
|
print('Best models score:\n', gs_dt.best_score_)
|
|
|
|
best_model = gs_dt.best_params_
|
|
best_model
|
|
best_model_score = gs_dt.best_score_
|
|
best_model_score
|
|
#%% Check the scores:
|
|
# https://stackoverflow.com/questions/44947574/what-is-the-meaning-of-mean-test-score-in-cv-result
|
|
#print([(len(train), len(test)) for train, test in skf_cv.split(X, y)])
|
|
gs_dt_fit.cv_results_
|
|
#%%BaseEstimator:Recall
|
|
pipeline = Pipeline([
|
|
('pre', MinMaxScaler()),
|
|
('clf', ClfSwitcher()),
|
|
])
|
|
|
|
|
|
parameters = [
|
|
{
|
|
'clf__estimator': [DecisionTreeClassifier(**rs)]
|
|
, 'clf__estimator__max_depth': [ 2, 4, 6, 8, 10]
|
|
, 'clf__estimator__criterion':['gini','entropy']
|
|
, 'clf__estimator__max_features':["auto", None]
|
|
, 'clf__estimator__max_leaf_nodes':[10,20,30,40]
|
|
}
|
|
]
|
|
|
|
gscv = GridSearchCV(pipeline
|
|
, parameters
|
|
, cv=skf_cv
|
|
, **njobs
|
|
, **scoring_refit
|
|
, return_train_score=False
|
|
, verbose=3)
|
|
|
|
gscv.fit(X, y)
|
|
print('Best model:\n', gscv.best_params_)
|
|
print('Best models score:\n', gscv.best_score_) |