added UQ_practice.py

2022-05-19 02:38:01 +01:00 · 2022-05-19 02:38:01 +01:00 · a9dc3c43e5
commit a9dc3c43e5
parent ee163d3978
1 changed files with 76 additions and 0 deletions
--- a/UQ_practice.py
+++ b/UQ_practice.py
@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed May 18 00:02:15 2022
+
+@author: tanu
+"""
+
+#UQ
+# https://towardsdatascience.com/hyper-parameter-tuning-and-model-selection-like-a-movie-star-a884b8ee8d68
+
+# import packages
+import numpy as np
+from sklearn import linear_model, datasets
+from sklearn.model_selection import GridSearchCV
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier 
+from sklearn.model_selection import GridSearchCV 
+from sklearn.pipeline import Pipeline# Loading dataset
+iris = datasets.load_iris()
+features = iris.data
+target = iris.target
+
+rs = {'random_state': 42}
+njobs = {'n_jobs': 10}
+
+logistic = LogisticRegression(**rs)
+
+logistic.fit(features, target)
+print(logistic.score(features, target))
+
+
+# Create range of candidate penalty hyperparameter values
+penalty = ['l1', 'l2']# Create range of candidate regularization hyperparameter values C
+
+# Choose 10 values, between 0 and 4
+C = np.logspace(0, 4, 10)# Create dictionary hyperparameter candidates
+hyperparameters = dict(C=C, penalty=penalty)# Create grid search, and pass in all defined values
+gridsearch = GridSearchCV(logistic, hyperparameters, cv=5, verbose=1) 
+# the verbose parameter above will give output updates as the calculations are complete. # select the best model and create a fit
+best_model = gridsearch.fit(features, target)
+
+print('Best Penalty:', best_model.best_estimator_.get_params(['penalty']))
+print('Best C:', best_model.best_estimator_.get_params()['C'])
+print("The mean accuracy of the model is:",best_model.score(features, target))
+
+
+####################################
+# Create a pipeline
+pipe = Pipeline([("classifier", RandomForestClassifier(**rs))])# Create dictionary with candidate learning algorithms and their hyperparameters
+search_space = [
+                {"classifier": [LogisticRegression()],
+                 "classifier__penalty": ['l2','l1'],
+                 "classifier__C": np.logspace(0, 4, 10)
+                 },
+                {"classifier": [LogisticRegression()],
+                 "classifier__penalty": ['l2'],
+                 "classifier__C": np.logspace(0, 4, 10),
+                 "classifier__solver":['newton-cg','saga','sag','liblinear'] ##This solvers don't allow L1 penalty
+                 },
+                 {"classifier": [RandomForestClassifier(**rs)],
+                  "classifier__n_estimators": [10, 100, 1000],
+                  "classifier__max_depth":[5,8,15,25,30,None],
+                  "classifier__min_samples_leaf":[1,2,5,10,15,100],
+                  "classifier__max_leaf_nodes": [2, 5,10]}
+                ]
+
+# create a gridsearch of the pipeline, the fit the best model
+gridsearch = GridSearchCV(pipe, search_space, cv=5, verbose=0
+                          , n_jobs=-1
+                          #, scoring = 'accuracy'
+                          ) # Fit grid search
+
+best_model = gridsearch.fit(features, target)
+print(best_model.best_estimator_)
+print("The mean accuracy of the model is:",best_model.score(features, target))