#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon May 16 05:59:12 2022 @author: tanu """ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Mar 15 11:09:50 2022 @author: tanu """ #%% Import libraries, data, and scoring func: UQ_pnca_ML.py rs = {'random_state': 42} njobs = {'n_jobs': 10} #%% Logistic Regression + hyperparam: BaseEstimator: ClfSwitcher() class ClfSwitcher(BaseEstimator): def __init__( self, estimator = SGDClassifier(), #feature = RFECV() ): """ A Custom BaseEstimator that can switch between classifiers. :param estimator: sklearn object - The classifier """ self.estimator = estimator def fit(self, X, y=None, **kwargs): self.estimator.fit(X, y) return self def predict(self, X, y=None): return self.estimator.predict(X) def predict_proba(self, X): return self.estimator.predict_proba(X) def score(self, X, y): return self.estimator.score(X, y) parameters = [ # {'feature__fs__estimator': LogisticRegression(**rs) # , 'feature__fs__cv': [10] # , 'feature__fs__scoring': ['matthews_corrcoef'] # }, { 'clf__estimator': [LogisticRegression(**rs)], 'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], #'clf__estimator__C': np.logspace(0, 4, 10), 'clf__estimator__penalty': ['none', 'l1', 'l2', 'elasticnet'], 'clf__estimator__max_iter': list(range(100,800,100)), 'clf__estimator__solver': ['saga'] }#, # { # 'clf__estimator': [MODEL2(**rs)], # #'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], # 'clf__estimator__C': np.logspace(0, 4, 10), # 'clf__estimator__penalty': ['l2', 'none'], # 'clf__estimator__max_iter': list(range(100,800,100)), # 'clf__estimator__solver': ['newton-cg', 'lbfgs', 'sag'] # }, ] #%% Create pipeline pipeline = Pipeline([ ('pre', MinMaxScaler()) # , ('fs', RFECV(LogisticRegression(**rs), cv = rskf_cv, scoring = 'matthews_corrcoef')) , ('selector', SelectKBest(mutual_info_classif, k=6)) , ('clf', ClfSwitcher()) ]) #%% Grid search i.e hyperparameter tuning and refitting on mcc mod_fs = GridSearchCV(pipeline , parameters , scoring = mcc_score_fn, refit = 'mcc' , cv = skf_cv , **njobs , return_train_score = False , verbose = 3) #%% Fit mod_fs_fit = mod_fs.fit(X, y) mod_fs_fbm = mod_fs_fit.best_params_ mod_fs_fbmr = mod_fs_fit.cv_results_ mod_fs_fbs = mod_fs_fit.best_score_ print('Best model:\n', mod_fs_fbm) print('Best models score:\n', mod_fs_fbs, ':' , round(mod_fs_fbs, 2)) #print('\nMean test score from fit results:', round(mean(mod_fs_fbmr['mean_test_mcc']),2)) print('\nMean test score from fit results:', round(np.nanmean(mod_fs_fbmr['mean_test_mcc']),2)) ############################################################################### #%% Blind test ###################################### # Blind test ###################################### test_predict = mod_fs_fit.predict(X_bts) print(test_predict) print('\nMCC on Blind test:' , round(matthews_corrcoef(y_bts, test_predict),2)) print('\nAccuracy on Blind test:', round(accuracy_score(y_bts, test_predict),2))