diff --git a/UQ_practice.py b/UQ_practice.py new file mode 100644 index 0000000..61c59f6 --- /dev/null +++ b/UQ_practice.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed May 18 00:02:15 2022 + +@author: tanu +""" + +#UQ +# https://towardsdatascience.com/hyper-parameter-tuning-and-model-selection-like-a-movie-star-a884b8ee8d68 + +# import packages +import numpy as np +from sklearn import linear_model, datasets +from sklearn.model_selection import GridSearchCV +from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import Pipeline# Loading dataset +iris = datasets.load_iris() +features = iris.data +target = iris.target + +rs = {'random_state': 42} +njobs = {'n_jobs': 10} + +logistic = LogisticRegression(**rs) + +logistic.fit(features, target) +print(logistic.score(features, target)) + + +# Create range of candidate penalty hyperparameter values +penalty = ['l1', 'l2']# Create range of candidate regularization hyperparameter values C + +# Choose 10 values, between 0 and 4 +C = np.logspace(0, 4, 10)# Create dictionary hyperparameter candidates +hyperparameters = dict(C=C, penalty=penalty)# Create grid search, and pass in all defined values +gridsearch = GridSearchCV(logistic, hyperparameters, cv=5, verbose=1) +# the verbose parameter above will give output updates as the calculations are complete. # select the best model and create a fit +best_model = gridsearch.fit(features, target) + +print('Best Penalty:', best_model.best_estimator_.get_params(['penalty'])) +print('Best C:', best_model.best_estimator_.get_params()['C']) +print("The mean accuracy of the model is:",best_model.score(features, target)) + + +#################################### +# Create a pipeline +pipe = Pipeline([("classifier", RandomForestClassifier(**rs))])# Create dictionary with candidate learning algorithms and their hyperparameters +search_space = [ + {"classifier": [LogisticRegression()], + "classifier__penalty": ['l2','l1'], + "classifier__C": np.logspace(0, 4, 10) + }, + {"classifier": [LogisticRegression()], + "classifier__penalty": ['l2'], + "classifier__C": np.logspace(0, 4, 10), + "classifier__solver":['newton-cg','saga','sag','liblinear'] ##This solvers don't allow L1 penalty + }, + {"classifier": [RandomForestClassifier(**rs)], + "classifier__n_estimators": [10, 100, 1000], + "classifier__max_depth":[5,8,15,25,30,None], + "classifier__min_samples_leaf":[1,2,5,10,15,100], + "classifier__max_leaf_nodes": [2, 5,10]} + ] + +# create a gridsearch of the pipeline, the fit the best model +gridsearch = GridSearchCV(pipe, search_space, cv=5, verbose=0 + , n_jobs=-1 + #, scoring = 'accuracy' + ) # Fit grid search + +best_model = gridsearch.fit(features, target) +print(best_model.best_estimator_) +print("The mean accuracy of the model is:",best_model.score(features, target))