#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Mar 18 16:44:18 2022 @author: tanu """ # Custom GridSearch #https://stackoverflow.com/questions/23045318/grid-search-over-multiple-classifiers #%% ClfSwitcher() class ClfSwitcher(BaseEstimator): def __init__( self, estimator = SGDClassifier(), ): """ A Custom BaseEstimator that can switch between classifiers. :param estimator: sklearn object - The classifier """ self.estimator = estimator def fit(self, X, y=None, **kwargs): self.estimator.fit(X, y) return self def predict(self, X, y=None): return self.estimator.predict(X) def predict_proba(self, X): return self.estimator.predict_proba(X) #def score(self, X, y): # return self.estimator.score(X, y) #def recall_score(self, X, y): # return self.estimator.recall_score(X, y) #%% Custom GridSearch: IntraModel[orig] def grid_search2(input_df, target, skf_cv, var_type = ['numerical', 'categorical','mixed']) : pipeline1 = Pipeline(( ('pre', MinMaxScaler()) , ('clf', DecisionTreeClassifier(**rs)) )) pipeline2 = Pipeline(( ('pre', MinMaxScaler()) ,('clf', KNeighborsClassifier()) )) parameters1 = { 'clf__max_depth': [ 2, 4, 6, 8, 10] , 'clf__criterion':['gini','entropy'] , "clf__max_features":["auto", None] , "clf__max_leaf_nodes":[10,20,30,40] } parameters2 = { 'clf__n_neighbors': [3, 7, 10], 'clf__weights': ['uniform', 'distance'] } pips = [pipeline1 , pipeline2 ] pars = [parameters1 , parameters2 ] print("\nstarting Gridsearch") for i in range(len(pars)): print('IIIII===>', i) gs = GridSearchCV(pips[i], pars[i] , cv = skf_cv , **scoring_refit #, refit=False , **njobs , verbose=3) gs = gs.fit(X, y) print ("finished Gridsearch") print ('\nBest model:', gs.best_params_) print ('\nBest score:', gs.best_score_) #%% Custom grid_search: Intra-Model [with return] def grid_search(input_df, target , skf_cv , chosen_scoreD #scoring_refit #, var_type = ['numerical', 'categorical','mixed'] ): # Pipeline_params pipeline1 = Pipeline(( ('pre', MinMaxScaler()) , ('clf', DecisionTreeClassifier(**rs)) )) pipeline2 = Pipeline(( ('pre', MinMaxScaler()) ,('clf', KNeighborsClassifier()) )) parameters1 = { 'clf__max_depth': [ 2, 4, 6, 8, 10] , 'clf__criterion':['gini','entropy'] , "clf__max_features":["auto", None] , "clf__max_leaf_nodes":[10,20,30,40] } parameters2 = { 'clf__n_neighbors': [3, 7, 10], 'clf__weights': ['uniform', 'distance'] } all_parameters = [parameters1 , parameters2 ] all_pipelines = [pipeline1 , pipeline2 ] print("\nStarting Gridsearch") # Run gridsearch for all each model out = {} #for for i in range(len(all_parameters)): model_name = str(all_pipelines[i].steps[1][1]) #model_name = str(model.steps[1][1]) #out[model_name] = dict() print("\nStarting Gridsearch for model:", model_name, i) gs = GridSearchCV(all_pipelines[i], all_parameters[i] , cv = skf_cv #, **scoring_refit #, refit=False , **chosen_scoreD , **njobs , verbose=3) gs = gs.fit(input_df,target) print('=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=') print ("Finished Gridsearch") print ('\nScore type to choose best score:', chosen_score) best_model = gs.best_params_ best_score = gs.best_score_ best_params = deepcopy(gs.best_params_) print ('\nBest score:', best_score, '\ntype: ', type(best_score)) print ('\nBest params:', best_params, '\ntype: ', type(best_params)) out[model_name] = best_params out[model_name].update(chosen_scoreD.copy()) out[model_name].update({'best_score': gs.best_score_}.copy()) return(out) #%% call CUSTOM grid_search: INTRA model [with return] # call chosen_score = {'scoring': 'recall' ,'refit': 'recall'} mcc_score_fn = {'chosen_scoreD': {'scoring': {'mcc': make_scorer(matthews_corrcoef) ,'refit': 'mcc'} } } intra_models = grid_search(X, y , skf_cv = skf_cv , chosen_scoreD= chosen_score #, **mcc_score_fn)# doesn't work pp.pprint(intra_models) # TO DO: combine other score metrics for the best model: This will tell you which hyperparams are the best with each metric # shouldn't be hard, as the dicts for a given model type will have the same shape # TO DO: if you can specify different score and append to the model