#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Mar 16 16:55:06 2022 @author: tanu """ # https://stackoverflow.com/questions/57248072/gridsearchcv-gives-different-result #%% variables mcc_score_fn = {'mcc': make_scorer(matthews_corrcoef)} scoring_refit = {'scoring': 'recall' ,'refit': 'recall'} scoring_refit = {'scoring': mcc_score_fn , 'refit': 'mcc'} scoring_refit = {'scoring': 'jaccard' ,'refit': 'jaccard'} njobs = {'n_jobs': 10} # my desktop has 12 cores skf_cv = StratifiedKFold(n_splits=10,shuffle = True) cv = {'cv': 10} #%% GridSearchCV gs_dt = GridSearchCV(estimator=DecisionTreeClassifier(**rs #,class_weight = {1:10, 0:1} ), param_grid=[{'max_depth': [ 2, 4, 6, 8, 10] , 'criterion':['gini','entropy'] , "max_features":["auto", None] , "max_leaf_nodes":[10,20,30,40]}] #, **cv , cv = skf_cv , **scoring_refit) #, scoring = mcc_score_fn, refit = 'mcc') #gs_dt.fit(X_train, y_train) #gs_dt_fit = gs_dt.fit(X_train, y_train) gs_dt.fit(X, y) gs_dt_fit = gs_dt.fit(X, y) gs_dt_fit_res = gs_dt_fit.cv_results_ print('Best model:\n', gs_dt.best_params_) print('Best models score:\n', gs_dt.best_score_) best_model = gs_dt.best_params_ best_model best_model_score = gs_dt.best_score_ best_model_score #%% Check the scores: # https://stackoverflow.com/questions/44947574/what-is-the-meaning-of-mean-test-score-in-cv-result #print([(len(train), len(test)) for train, test in skf_cv.split(X, y)]) gs_dt_fit.cv_results_ #%%BaseEstimator:Recall pipeline = Pipeline([ ('pre', MinMaxScaler()), ('clf', ClfSwitcher()), ]) parameters = [ { 'clf__estimator': [DecisionTreeClassifier(**rs)] , 'clf__estimator__max_depth': [ 2, 4, 6, 8, 10] , 'clf__estimator__criterion':['gini','entropy'] , 'clf__estimator__max_features':["auto", None] , 'clf__estimator__max_leaf_nodes':[10,20,30,40] } ] gscv = GridSearchCV(pipeline , parameters , cv=skf_cv , **njobs , **scoring_refit , return_train_score=False , verbose=3) gscv.fit(X, y) print('Best model:\n', gscv.best_params_) print('Best models score:\n', gscv.best_score_)