#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon May 16 05:59:12 2022 @author: tanu """ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Mar 15 11:09:50 2022 @author: tanu """ #%% Import libraries, data, and scoring func: UQ_pnca_ML.py rs = {'random_state': 42} njobs = {'n_jobs': 10} #%% Logistic Regression + hyperparam: BaseEstimator: ClfSwitcher() class ClfSwitcher(BaseEstimator): def __init__( self, estimator = SGDClassifier(), #feature = RFECV(SGDClassifier()) ): """ A Custom BaseEstimator that can switch between classifiers. :param estimator: sklearn object - The classifier """ self.estimator = estimator #self.feature = feature def fit(self, X, y=None, **kwargs): self.estimator.fit(X, y) #self.feature.fit(X, y) return self # def transform(self, X, y=None): # #self.estimator.transform(X, y) # self.feature.transform(X) # return self def predict(self, X, y=None): return self.estimator.predict(X) def predict_proba(self, X): return self.estimator.predict_proba(X) def score(self, X, y): return self.estimator.score(X, y) #%% parameters = [ # {'fs__feature__min_features_to_select': [1] # , 'fs__feature__scoring': ['matthews_corrcoef'] # , 'fs__feature__cv': [skf_cv]}, {'fs__min_features_to_select': [1] #, 'fs__scoring': ['matthews_corrcoef'] , 'fs__cv': [skf_cv]}, { 'clf__estimator': [LogisticRegression(**rs)], #'clf__estimator__C': np.logspace(0, 4, 10), 'clf__estimator__penalty': ['none', 'l1', 'l2', 'elasticnet'], 'clf__estimator__max_iter': list(range(100,800,100)), 'clf__estimator__solver': ['saga'] }#, # { # 'clf__estimator': [MODEL2(**rs)], # 'clf__estimator__C': np.logspace(0, 4, 10), # 'clf__estimator__penalty': ['l2', 'none'], # 'clf__estimator__max_iter': list(range(100,800,100)), # 'clf__estimator__solver': ['newton-cg', 'lbfgs', 'sag'] # }, ] #%% Create pipeline pipeline = Pipeline([ ('pre', MinMaxScaler()) , ('fs', RFECV(LogisticRegression(**rs), scoring = 'matthews_corrcoef'))#cant be my mcc_fn # , ('fs', ClfSwitcher()) , ('clf', ClfSwitcher()) ]) #%% # Grid search i.e hyperparameter tuning and refitting on mcc gscv_lr = GridSearchCV(pipeline , parameters , scoring = mcc_score_fn, refit = 'mcc' , cv = skf_cv , **njobs , return_train_score = False , verbose = 3) # Fit gscv_lr.fit(X, y) #### gscv_lr_fit = gscv_lr.fit(X, y) gscv_lr_fit_be_mod = gscv_lr_fit.best_params_ gscv_lr_fit_be_res = gscv_lr_fit.cv_results_ #%% Grid search i.e hyperparameter tuning and refitting on mcc param_grid2 = [ {'fs__min_features_to_select': [1] , 'fs__cv': [skf_cv] }, { #'clf__estimator': [LogisticRegression(**rs)], 'clf__C': np.logspace(0, 4, 10), 'clf__penalty': ['l2'], 'clf__max_iter': list(range(100,200,100)), #'clf__solver': ['newton-cg', 'lbfgs', 'sag'] 'clf__solver': ['sag'] }, { #'clf__estimator': [LogisticRegression(**rs)], 'clf__C': np.logspace(0, 4, 10), 'clf__penalty': ['l1', 'l2'], 'clf__max_iter': list(range(100,200,100)), 'clf__solver': ['liblinear'] } ] # step 4: create pipeline pipeline = Pipeline([ ('pre', MinMaxScaler()) #, ('fs', model_rfecv) , ('fs', RFECV(LogisticRegression(**rs), scoring = 'matthews_corrcoef')) , ('clf', LogisticRegression(**rs))]) # step 5: Perform Gridsearch CV gs_final = GridSearchCV(pipeline , param_grid2 , cv = skf_cv , scoring = mcc_score_fn, refit = 'mcc' , verbose = 1 , return_train_score = False , **njobs) #%% Fit mod_fs_fit = mod_fs.fit(X, y) mod_fs_fbm = mod_fs_fit.best_params_ mod_fs_fbmr = mod_fs_fit.cv_results_ mod_fs_fbs = mod_fs_fit.best_score_ print('Best model:\n', mod_fs_fbm) print('Best models score:\n', mod_fs_fbs, ':' , round(mod_fs_fbs, 2)) #print('\nMean test score from fit results:', round(mean(mod_fs_fbmr['mean_test_mcc']),2)) print('\nMean test score from fit results:', round(np.nanmean(mod_fs_fbmr['mean_test_mcc']),2)) ############################################################################### #%% Blind test ###################################### # Blind test ###################################### test_predict = mod_fs_fit.predict(X_bts) print(test_predict) print('\nMCC on Blind test:' , round(matthews_corrcoef(y_bts, test_predict),2)) print('\nAccuracy on Blind test:', round(accuracy_score(y_bts, test_predict),2))