diff --git a/UQ_LR_FS.py b/UQ_LR_FS_p1.py similarity index 97% rename from UQ_LR_FS.py rename to UQ_LR_FS_p1.py index 9110b77..4c444df 100644 --- a/UQ_LR_FS.py +++ b/UQ_LR_FS_p1.py @@ -230,19 +230,20 @@ print(confusion_matrix(y_bts, test_predict)) # ADD that within the loop # https://towardsdatascience.com/5-feature-selection-method-from-scikit-learn-you-should-know-ed4d116e4172 ##################### -from sklearn.feature_selection import RFE +from sklearn.feature_selection import RFECV from sklearn.linear_model import LogisticRegression from sklearn.feature_selection import SelectFromModel from sklearn.feature_selection import SequentialFeatureSelector # RFE: ~ model coef or feature_importance -rfe_selector = RFE(estimator = LogisticRegression(**rs +rfe_selector = RFECV(estimator = LogisticRegression(**rs , penalty='l1' , solver='saga' , max_iter = 100 , C= 1.0) - , n_features_to_select = None # median by default - , step = 1) + #, n_features_to_select = None # median by default + , step = 1 + , cv = 10) rfe_selector.fit(X, y) rfe_fs = X.columns[rfe_selector.get_support()] print('\nFeatures selected from Recursive Feature Elimination:', len(rfe_fs) diff --git a/UQ_pnca_ML.py b/UQ_pnca_ML.py index 293d878..45c88ab 100644 --- a/UQ_pnca_ML.py +++ b/UQ_pnca_ML.py @@ -13,6 +13,8 @@ import numpy as np import pprint as pp from copy import deepcopy from sklearn import linear_model +from sklearn import datasets + from sklearn.linear_model import LogisticRegression, LinearRegression from sklearn.naive_bayes import BernoulliNB from sklearn.neighbors import KNeighborsClassifier @@ -70,15 +72,15 @@ print(np.__version__) print(pd.__version__) from statistics import mean, stdev, median, mode -#from imblearn.over_sampling import RandomOverSampler -#from imblearn.over_sampling import SMOTE -#from imblearn.pipeline import Pipeline -#from sklearn.datasets import make_classification +from imblearn.over_sampling import RandomOverSampler +from imblearn.over_sampling import SMOTE +from imblearn.pipeline import Pipeline +from sklearn.datasets import make_classification from sklearn.model_selection import cross_validate, cross_val_score from sklearn.model_selection import RepeatedStratifiedKFold from sklearn.ensemble import AdaBoostClassifier -#from imblearn.combine import SMOTEENN -#from imblearn.under_sampling import EditedNearestNeighbours +from imblearn.combine import SMOTEENN +from imblearn.under_sampling import EditedNearestNeighbours from sklearn.model_selection import GridSearchCV from sklearn.base import BaseEstimator @@ -86,9 +88,9 @@ from sklearn.base import BaseEstimator scoring_fn = ({'accuracy' : make_scorer(accuracy_score) , 'fscore' : make_scorer(f1_score) , 'mcc' : make_scorer(matthews_corrcoef) - , 'precision' : make_scorer(precision_score) - , 'recall' : make_scorer(recall_score) - , 'roc_auc' : make_scorer(roc_auc_score) + , 'precision' : make_scorer(precision_score) + , 'recall' : make_scorer(recall_score) + , 'roc_auc' : make_scorer(roc_auc_score) , 'jcc' : make_scorer(jaccard_score) }) @@ -105,7 +107,7 @@ rskf_cv = RepeatedStratifiedKFold(n_splits = 10 ,**rs) mcc_score_fn = {'mcc': make_scorer(matthews_corrcoef)} - +jacc_score_fn = {'jcc': make_scorer(jaccard_score)} #%% homedir = os.path.expanduser("~") os.chdir(homedir + "/git/ML_AI_training/")