ML_AI_training/grid_search_vs_base_estimator.py

82 lines
No EOL
2.5 KiB
Python
Executable file

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 16 16:55:06 2022
@author: tanu
"""
# https://stackoverflow.com/questions/57248072/gridsearchcv-gives-different-result
#%% variables
mcc_score_fn = {'mcc': make_scorer(matthews_corrcoef)}
scoring_refit = {'scoring': 'recall'
,'refit': 'recall'}
scoring_refit = {'scoring': mcc_score_fn
, 'refit': 'mcc'}
scoring_refit = {'scoring': 'jaccard'
,'refit': 'jaccard'}
njobs = {'n_jobs': 10} # my desktop has 12 cores
skf_cv = StratifiedKFold(n_splits=10,shuffle = True)
cv = {'cv': 10}
#%% GridSearchCV
gs_dt = GridSearchCV(estimator=DecisionTreeClassifier(**rs
#,class_weight = {1:10, 0:1}
),
param_grid=[{'max_depth': [ 2, 4, 6, 8, 10]
, 'criterion':['gini','entropy']
, "max_features":["auto", None]
, "max_leaf_nodes":[10,20,30,40]}]
#, **cv
, cv = skf_cv
, **scoring_refit)
#, scoring = mcc_score_fn, refit = 'mcc')
#gs_dt.fit(X_train, y_train)
#gs_dt_fit = gs_dt.fit(X_train, y_train)
gs_dt.fit(X, y)
gs_dt_fit = gs_dt.fit(X, y)
gs_dt_fit_res = gs_dt_fit.cv_results_
print('Best model:\n', gs_dt.best_params_)
print('Best models score:\n', gs_dt.best_score_)
best_model = gs_dt.best_params_
best_model
best_model_score = gs_dt.best_score_
best_model_score
#%% Check the scores:
# https://stackoverflow.com/questions/44947574/what-is-the-meaning-of-mean-test-score-in-cv-result
#print([(len(train), len(test)) for train, test in skf_cv.split(X, y)])
gs_dt_fit.cv_results_
#%%BaseEstimator:Recall
pipeline = Pipeline([
('pre', MinMaxScaler()),
('clf', ClfSwitcher()),
])
parameters = [
{
'clf__estimator': [DecisionTreeClassifier(**rs)]
, 'clf__estimator__max_depth': [ 2, 4, 6, 8, 10]
, 'clf__estimator__criterion':['gini','entropy']
, 'clf__estimator__max_features':["auto", None]
, 'clf__estimator__max_leaf_nodes':[10,20,30,40]
}
]
gscv = GridSearchCV(pipeline
, parameters
, cv=skf_cv
, **njobs
, **scoring_refit
, return_train_score=False
, verbose=3)
gscv.fit(X, y)
print('Best model:\n', gscv.best_params_)
print('Best models score:\n', gscv.best_score_)