renamed UQ_LR_FS.py to UQ_LR_FS_p1.py

This commit is contained in:
Tanushree Tunstall 2022-05-21 04:24:28 +01:00
parent e16e82e673
commit 3742a5f62d
2 changed files with 17 additions and 14 deletions

View file

@ -230,19 +230,20 @@ print(confusion_matrix(y_bts, test_predict))
# ADD that within the loop
# https://towardsdatascience.com/5-feature-selection-method-from-scikit-learn-you-should-know-ed4d116e4172
#####################
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import SequentialFeatureSelector
# RFE: ~ model coef or feature_importance
rfe_selector = RFE(estimator = LogisticRegression(**rs
rfe_selector = RFECV(estimator = LogisticRegression(**rs
, penalty='l1'
, solver='saga'
, max_iter = 100
, C= 1.0)
, n_features_to_select = None # median by default
, step = 1)
#, n_features_to_select = None # median by default
, step = 1
, cv = 10)
rfe_selector.fit(X, y)
rfe_fs = X.columns[rfe_selector.get_support()]
print('\nFeatures selected from Recursive Feature Elimination:', len(rfe_fs)

View file

@ -13,6 +13,8 @@ import numpy as np
import pprint as pp
from copy import deepcopy
from sklearn import linear_model
from sklearn import datasets
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
@ -70,15 +72,15 @@ print(np.__version__)
print(pd.__version__)
from statistics import mean, stdev, median, mode
#from imblearn.over_sampling import RandomOverSampler
#from imblearn.over_sampling import SMOTE
#from imblearn.pipeline import Pipeline
#from sklearn.datasets import make_classification
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_validate, cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import AdaBoostClassifier
#from imblearn.combine import SMOTEENN
#from imblearn.under_sampling import EditedNearestNeighbours
from imblearn.combine import SMOTEENN
from imblearn.under_sampling import EditedNearestNeighbours
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator
@ -86,9 +88,9 @@ from sklearn.base import BaseEstimator
scoring_fn = ({'accuracy' : make_scorer(accuracy_score)
, 'fscore' : make_scorer(f1_score)
, 'mcc' : make_scorer(matthews_corrcoef)
, 'precision' : make_scorer(precision_score)
, 'recall' : make_scorer(recall_score)
, 'roc_auc' : make_scorer(roc_auc_score)
, 'precision' : make_scorer(precision_score)
, 'recall' : make_scorer(recall_score)
, 'roc_auc' : make_scorer(roc_auc_score)
, 'jcc' : make_scorer(jaccard_score)
})
@ -105,7 +107,7 @@ rskf_cv = RepeatedStratifiedKFold(n_splits = 10
,**rs)
mcc_score_fn = {'mcc': make_scorer(matthews_corrcoef)}
jacc_score_fn = {'jcc': make_scorer(jaccard_score)}
#%%
homedir = os.path.expanduser("~")
os.chdir(homedir + "/git/ML_AI_training/")