added grid_search_vs_base_estimator.py to compare results from baseestimator and gridsearch manual
This commit is contained in:
parent
b27bfa4a96
commit
d3b6fe13a6
1 changed files with 82 additions and 0 deletions
82
grid_search_vs_base_estimator.py
Normal file
82
grid_search_vs_base_estimator.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Wed Mar 16 16:55:06 2022
|
||||
|
||||
@author: tanu
|
||||
"""
|
||||
# https://stackoverflow.com/questions/57248072/gridsearchcv-gives-different-result
|
||||
#%% variables
|
||||
mcc_score_fn = {'mcc': make_scorer(matthews_corrcoef)}
|
||||
|
||||
scoring_refit = {'scoring': 'recall'
|
||||
,'refit': 'recall'}
|
||||
|
||||
scoring_refit = {'scoring': mcc_score_fn
|
||||
, 'refit': 'mcc'}
|
||||
|
||||
scoring_refit = {'scoring': 'jaccard'
|
||||
,'refit': 'jaccard'}
|
||||
|
||||
njobs = {'n_jobs': 10} # my desktop has 12 cores
|
||||
skf_cv = StratifiedKFold(n_splits=10,shuffle = True)
|
||||
cv = {'cv': 10}
|
||||
|
||||
#%% GridSearchCV
|
||||
gs_dt = GridSearchCV(estimator=DecisionTreeClassifier(**rs
|
||||
#,class_weight = {1:10, 0:1}
|
||||
),
|
||||
param_grid=[{'max_depth': [ 2, 4, 6, 8, 10]
|
||||
, 'criterion':['gini','entropy']
|
||||
, "max_features":["auto", None]
|
||||
, "max_leaf_nodes":[10,20,30,40]}]
|
||||
#, **cv
|
||||
, cv = skf_cv
|
||||
, **scoring_refit)
|
||||
#, scoring = mcc_score_fn, refit = 'mcc')
|
||||
|
||||
#gs_dt.fit(X_train, y_train)
|
||||
#gs_dt_fit = gs_dt.fit(X_train, y_train)
|
||||
gs_dt.fit(X, y)
|
||||
gs_dt_fit = gs_dt.fit(X, y)
|
||||
gs_dt_fit_res = gs_dt_fit.cv_results_
|
||||
|
||||
print('Best model:\n', gs_dt.best_params_)
|
||||
print('Best models score:\n', gs_dt.best_score_)
|
||||
|
||||
best_model = gs_dt.best_params_
|
||||
best_model
|
||||
best_model_score = gs_dt.best_score_
|
||||
best_model_score
|
||||
#%% Check the scores:
|
||||
# https://stackoverflow.com/questions/44947574/what-is-the-meaning-of-mean-test-score-in-cv-result
|
||||
#print([(len(train), len(test)) for train, test in skf_cv.split(X, y)])
|
||||
gs_dt_fit.cv_results_
|
||||
#%%BaseEstimator:Recall
|
||||
pipeline = Pipeline([
|
||||
('pre', MinMaxScaler()),
|
||||
('clf', ClfSwitcher()),
|
||||
])
|
||||
|
||||
|
||||
parameters = [
|
||||
{
|
||||
'clf__estimator': [DecisionTreeClassifier(**rs)]
|
||||
, 'clf__estimator__max_depth': [ 2, 4, 6, 8, 10]
|
||||
, 'clf__estimator__criterion':['gini','entropy']
|
||||
, 'clf__estimator__max_features':["auto", None]
|
||||
, 'clf__estimator__max_leaf_nodes':[10,20,30,40]
|
||||
}
|
||||
]
|
||||
|
||||
gscv = GridSearchCV(pipeline
|
||||
, parameters
|
||||
, cv=skf_cv
|
||||
, **njobs
|
||||
, **scoring_refit
|
||||
, return_train_score=False
|
||||
, verbose=3)
|
||||
|
||||
gscv.fit(X, y)
|
||||
print('Best model:\n', gscv.best_params_)
|
||||
print('Best models score:\n', gscv.best_score_)
|
Loading…
Add table
Add a link
Reference in a new issue