#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed May 18 00:02:15 2022 @author: tanu """ #UQ # https://towardsdatascience.com/hyper-parameter-tuning-and-model-selection-like-a-movie-star-a884b8ee8d68 # import packages import numpy as np from sklearn import linear_model, datasets from sklearn.model_selection import GridSearchCV from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline# Loading dataset iris = datasets.load_iris() features = iris.data target = iris.target rs = {'random_state': 42} njobs = {'n_jobs': 10} logistic = LogisticRegression(**rs) logistic.fit(features, target) print(logistic.score(features, target)) # Create range of candidate penalty hyperparameter values penalty = ['l1', 'l2']# Create range of candidate regularization hyperparameter values C # Choose 10 values, between 0 and 4 C = np.logspace(0, 4, 10)# Create dictionary hyperparameter candidates hyperparameters = dict(C=C, penalty=penalty)# Create grid search, and pass in all defined values gridsearch = GridSearchCV(logistic, hyperparameters, cv=5, verbose=1) # the verbose parameter above will give output updates as the calculations are complete. # select the best model and create a fit best_model = gridsearch.fit(features, target) print('Best Penalty:', best_model.best_estimator_.get_params(['penalty'])) print('Best C:', best_model.best_estimator_.get_params()['C']) print("The mean accuracy of the model is:",best_model.score(features, target)) #################################### # Create a pipeline pipe = Pipeline([("classifier", RandomForestClassifier(**rs))])# Create dictionary with candidate learning algorithms and their hyperparameters search_space = [ {"classifier": [LogisticRegression()], "classifier__penalty": ['l2','l1'], "classifier__C": np.logspace(0, 4, 10) }, {"classifier": [LogisticRegression()], "classifier__penalty": ['l2'], "classifier__C": np.logspace(0, 4, 10), "classifier__solver":['newton-cg','saga','sag','liblinear'] ##This solvers don't allow L1 penalty }, {"classifier": [RandomForestClassifier(**rs)], "classifier__n_estimators": [10, 100, 1000], "classifier__max_depth":[5,8,15,25,30,None], "classifier__min_samples_leaf":[1,2,5,10,15,100], "classifier__max_leaf_nodes": [2, 5,10]} ] # create a gridsearch of the pipeline, the fit the best model gridsearch = GridSearchCV(pipe, search_space, cv=5, verbose=0 , n_jobs=-1 #, scoring = 'accuracy' ) # Fit grid search best_model = gridsearch.fit(features, target) print(best_model.best_estimator_) print("The mean accuracy of the model is:",best_model.score(features, target))