#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed May 18 06:03:24 2022 @author: tanu """ #%% RandomForest + hyperparam: BaseEstimator: ClfSwitcher() class ClfSwitcher(BaseEstimator): def __init__( self, estimator = SGDClassifier(), ): """ A Custom BaseEstimator that can switch between classifiers. :param estimator: sklearn object - The classifier """ self.estimator = estimator def fit(self, X, y=None, **kwargs): self.estimator.fit(X, y) return self def predict(self, X, y=None): return self.estimator.predict(X) def predict_proba(self, X): return self.estimator.predict_proba(X) def score(self, X, y): return self.estimator.score(X, y) parameters = [ { 'clf__estimator': [RandomForestClassifier(**rs , **njobs , bootstrap = True , oob_score = True)], 'clf__estimator__max_depth': [4, 6, 8, 10, 12, 16, 20, None] , 'clf__estimator__class_weight':['balanced','balanced_subsample'] , 'clf__estimator__n_estimators': [10, 25, 50, 100] , 'clf__estimator__criterion': ['gini', 'entropy']#, 'log_loss'] #, 'clf__estimator__max_features': ['auto', 'sqrt'] , 'clf__estimator__min_samples_leaf': [1, 2, 3, 4, 5, 10] , 'clf__estimator__min_samples_split': [2, 5, 15, 20] } # { # 'clf__estimator': [RandomForestClassifier(**rs # , **njobs # , bootstrap = True # , oob_score = True)], # 'clf__estimator__max_depth': [ 6, 8, 10 ] # , 'clf__estimator__class_weight':['balanced_subsample'] # , 'clf__estimator__n_estimators': [10] # , 'clf__estimator__criterion': ['entropy'] # #, 'clf__estimator__max_features': ['auto', 'sqrt'] # , 'clf__estimator__min_samples_leaf': [2, 8] # , 'clf__estimator__min_samples_split': [20] # } ] # Create pipeline pipeline = Pipeline([ ('pre', MinMaxScaler()), ('clf', ClfSwitcher()), ]) # Grid search i.e hyperparameter tuning and refitting on mcc gscv_rf = GridSearchCV(pipeline , parameters #, scoring = 'f1', refit = 'f1' , scoring = mcc_score_fn, refit = 'mcc' , cv = skf_cv , **njobs , return_train_score = False , verbose = 3) # Fit gscv_rf_fit_be = gscv_rf.fit(X, y) print('Best model:\n', gscv_rf.best_params_) gscv_rf_fit_be.best_params_ print('Best models score:\n', gscv_rf_fit_be.best_score_, ':' , round(gscv_rf_fit_be.best_score_, 2)) # See how it does on the BLIND test #print('\nBlind test score, mcc:', ) test_predict = gscv_rf_fit_be.predict(X_bts) print(test_predict) print(np.array(y_bts)) y_btsf = np.array(y_bts) #gscv_rf_fit_be.score(test_predict, y_btsf) from sklearn.metrics import accuracy_score from sklearn.metrics import matthews_corrcoef print(accuracy_score(y_btsf, test_predict)) print(matthews_corrcoef(y_btsf, test_predict)) print(matthews_corrcoef(test_predict, y_btsf)) print(accuracy_score(test_predict, y_btsf)) #check_score = f1_score(y, gscv_rf.predict(X)) #check_score # should be the same as the best score when the same metric used! # mod_pred = gscv_rf.predict(X_test) # fscore = f1_score(y_test, mod_pred) # fscore gscv_rf_be_mod = gscv_rf.best_params_ print(gscv_rf_be_mod) gscv_rf_fit_be_res = gscv_rf_fit_be.cv_results_ #print('\nMean test score from fit results:', round(mean(gscv_rf_fit_be_res['mean_test_mcc']),2)) print('\nMean test score from fit results:', round(np.nanmean(gscv_rf_fit_be_res['mean_test_mcc']),2)) # /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427 # : FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. # To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter # as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. # warn( # ALL # {'clf__estimator': RandomForestClassifier(class_weight='balanced_subsample', criterion='entropy', # max_depth=6, max_features='auto', min_samples_leaf=2, # min_samples_split=20, n_estimators=10, n_jobs=10, # oob_score=True, random_state=42) # , 'clf__estimator__class_weight': 'balanced_subsample' # , 'clf__estimator__criterion': 'entropy' # , 'clf__estimator__max_depth': 6 # , 'clf__estimator__max_features': 'auto' # , 'clf__estimator__min_samples_leaf': 2 # , 'clf__estimator__min_samples_split': 20 # , 'clf__estimator__n_estimators': 10} #%%