#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon May 16 05:59:12 2022 @author: tanu """ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Mar 15 11:09:50 2022 @author: tanu """ # similar to _p1 but with Clf_Switcher #%% Import libraries, data, and scoring func: UQ_pnca_ML.py rs = {'random_state': 42} njobs = {'n_jobs': 10} #%% Logistic Regression + hyperparam: BaseEstimator: ClfSwitcher() class ClfSwitcher(BaseEstimator): def __init__( self, estimator = SGDClassifier(), ): """ A Custom BaseEstimator that can switch between classifiers. :param estimator: sklearn object - The classifier """ self.estimator = estimator def fit(self, X, y=None, **kwargs): self.estimator.fit(X, y) return self def predict(self, X, y=None): return self.estimator.predict(X) def predict_proba(self, X): return self.estimator.predict_proba(X) def score(self, X, y): return self.estimator.score(X, y) #%% parameters = [ {'fs__min_features_to_select': [1] #, 'fs__scoring': ['matthews_corrcoef'] , 'fs__cv': [skf_cv]}, # { # 'clf__estimator': [LogisticRegression(**rs)], # 'clf__estimator__C': np.logspace(0, 4, 10), # 'clf__estimator__penalty': ['none', 'l1', 'l2', 'elasticnet'], # 'clf__estimator__max_iter': list(range(100,800,100)), # 'clf__estimator__solver': ['saga'] # }, # { # 'clf__estimator': [LogisticRegression(**rs)], # 'clf__estimator__C': np.logspace(0, 4, 10), # 'clf__estimator__penalty': ['l2', 'none'], # 'clf__estimator__max_iter': list(range(100,800,100)), # 'clf__estimator__solver': ['newton-cg', 'lbfgs', 'sag'] # }, # { # 'clf__estimator': [LogisticRegression(**rs)], # 'clf__estimator__C': np.logspace(0, 4, 10), # 'clf__estimator__penalty': ['l1', 'l2'], # 'clf__estimator__max_iter': list(range(100,800,100)), # 'clf__estimator__solver': ['liblinear'] # } {'fs__min_features_to_select': [1,2]}, {'classifier': [LogisticRegression()], #'classifier__C': np.logspace(0, 4, 10), 'classifier__C': [2, 2.8], 'classifier__max_iter': [100], 'classifier__penalty': ['l1', 'l2'], 'classifier__solver': ['saga'] } ] #%% Create pipeline pipeline = Pipeline([ # ('pre', MinMaxScaler()) ('fs', RFECV(LogisticRegression(**rs), scoring = 'matthews_corrcoef'))#cant be my mcc_fn #, ('clf', ClfSwitcher()) , ('classifier', ClfSwitcher()) ]) #%% # Grid search i.e hyperparameter tuning and refitting on mcc gscv_lr = GridSearchCV(pipeline , parameters , scoring = mcc_score_fn, refit = 'mcc' , cv = skf_cv , **njobs , return_train_score = False , verbose = 3) # Fit gscv_lr.fit(X, y) gscv_lr.best_estimator_ gscv_lr.best_params_ gscv_lr.best_score_ # Blind test test_predict = gscv_lr.predict(X_bts) print(test_predict) print('\nMCC on Blind test:' , round(matthews_corrcoef(y_bts, test_predict),2)) print('\nAccuracy on Blind test:', round(accuracy_score(y_bts, test_predict),2)) #### gscv_lr_fit = gscv_lr.fit(X, y) gscv_lr_fit_be_mod = gscv_lr_fit.best_params_ gscv_lr_fit_be_res = gscv_lr_fit.cv_results_ gscv_lr_fit.best_score_ print('Best model:\n', gscv_lr_fit_be_mod) print('Best models score:\n', gscv_lr_fit.best_score_, ':' , round(gscv_lr_fit.best_score_, 2)) #print('\nMean test score from fit results:', round(mean(mod_fs_fbmr['mean_test_mcc']),2)) print('\nMean test score from fit results:' , round(np.nanmean(gscv_lr_fit_be_res['mean_test_mcc']),2)) #%% print selected features # Now get the features out all_features = gscv_lr.feature_names_in_ #all_features = gsfit.feature_names_in_ sel_features = X.columns[gscv_lr.best_estimator_.named_steps['fs'].get_support()] n_sf = gscv_lr.best_estimator_.named_steps['fs'].n_features_ # get model name model_name = gscv_lr.best_estimator_.named_steps['clf'] b_model_params = gscv_lr.best_params_ print('\n========================================' , '\nRunning model:' , '\nModel name:', model_name , '\n===============================================' , '\nRunning feature selection with RFECV for model' , '\nTotal no. of features in model:', len(all_features) , '\nThese are:\n', all_features, '\n\n' , '\nNo of features for best model: ', n_sf , '\nThese are:', sel_features, '\n\n' , '\nBest Model hyperparams:', b_model_params ) ############################################################################### #%% Blind test ###################################### # Blind test ###################################### test_predict = gscv_lr.predict(X_bts) print(test_predict) print('\nMCC on Blind test:' , round(matthews_corrcoef(y_bts, test_predict),2)) print('\nAccuracy on Blind test:', round(accuracy_score(y_bts, test_predict),2))