diff --git a/MultClassPipe3.py b/MultClassPipe3.py index a795779..5c7a780 100644 --- a/MultClassPipe3.py +++ b/MultClassPipe3.py @@ -126,14 +126,21 @@ def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categ n_estimators = 1000 ) rf2 = RandomForestClassifier( min_samples_leaf = 5 - , n_estimators = 1000 + , n_estimators = 100 #10 , bootstrap = True , oob_score = True , **njobs , **rs , max_features = 'auto') - xgb = XGBClassifier(**rs - , verbosity = 0, use_label_encoder =False) + xgb = XGBClassifier(**rs, verbosity = 0, use_label_encoder =False) + + lda = LinearDiscriminantAnalysis() + + mnb = MultinomialNB(**rs) + + pa = PassiveAggressiveClassifier(**rs, **njobs) + + sgd = SGDClassifier(**rs, **njobs) models = [('Logistic Regression', log_reg) , ('Naive Bayes' , nb) @@ -145,7 +152,11 @@ def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categ , ('Random Forest' , rf) , ('Naive Bayes' , nb) , ('Random Forest2' , rf2) - , ('XGBoost' , xgb)] + , ('XGBoost' , xgb) + , ('LDA' , lda) + , ('MultinomialNB' , mnb) + , ('PassiveAggresive' , pa) + , ('StochasticGDescent' , sgd)] mm_skf_scoresD = {} diff --git a/classification_params_FS.py b/classification_params_FS.py index 2e6e450..12f53bd 100644 --- a/classification_params_FS.py +++ b/classification_params_FS.py @@ -7,8 +7,6 @@ # https://github.com/automl/auto-sklearn/tree/master/autosklearn/pipeline/components/classification # TOADD: -# Extra Trees -https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/extra_trees.py # LDA https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/lda.py # Multinomial_nb @@ -565,13 +563,138 @@ param_grid_svc = [ # 'clf': [SVC(**rs)], # 'clf__kernel': ['poly', 'rbf', 'sigmoid'] , 'clf__kernel': ['rbf'] - , 'clf__C' : [50, 10, 1.0, 0.1, 0.01] +# , 'clf__C' : [50, 10, 1.0, 0.1, 0.01] , 'clf__C' : [1, 0.03, 10, 100, 1000, 10000, 32768] , 'clf__gamma' : ['scale', 'auto'] } ] +####################################################################### +####################################################################### +#======== +# LDA +# https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/lda.py +#======== + +estimator = LinearDiscriminantAnalysis() + +# Define pipleline with steps +pipe_lda = Pipeline([ + ('pre', MinMaxScaler()) + , ('fs', RFECV(DecisionTreeClassifier(**rs), cv = cv, scoring = 'matthews_corrcoef')) +# , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef')) + , ('clf', estimator) + ]) + +# Define hyperparmeter space to search for +param_grid_lda = [ + { + 'fs__min_features_to_select' : [1,2] +# , 'fs__cv': [cv] + }, + + { +# 'clf': [LinearDiscriminantAnalysis()], + 'clf__solver' : ['svd', 'lsqr', 'eigen'], + 'clf__shrinkage' : [None, 'auto', 0, 0.5, 1], + + } +] ####################################################################### +#======== +# Multinomial_nb +# https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/multinomial_nb.py +#======== +estimator = MultinomialNB(**rs) + +# Define pipleline with steps +pipe_mnb = Pipeline([ + ('pre', MinMaxScaler()) + , ('fs', RFECV(DecisionTreeClassifier(), cv = cv, scoring = 'matthews_corrcoef')) +# , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef')) + , ('clf', estimator) + ]) + +# Define hyperparmeter space to search for +param_grid_mnb = [ + { + 'fs__min_features_to_select' : [1,2] +# , 'fs__cv': [cv] + }, + + { +# 'clf': [MultinomialNB()], + 'clf__alpha': [0.01, 0.1, 1, 20, 25, 50, 55, 100] + + + } +] +####################################################################### +#======== +# passive_aggressive +# https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/passive_aggressive.py +#======== + +estimator = PassiveAggressiveClassifier(**rs, **njobs) + +# Define pipleline with steps +pipe_pa = Pipeline([ + ('pre', MinMaxScaler()) + , ('fs', RFECV(DecisionTreeClassifier(**rs), cv = cv, scoring = 'matthews_corrcoef')) +# , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef')) + , ('clf', estimator) + ]) + +# Define hyperparmeter space to search for +param_grid_pa = [ + { + 'fs__min_features_to_select' : [1,2] +# , 'fs__cv': [cv] + }, + + { +# 'clf': [PassiveAggressiveClassifier(**rs, **njobs)], + 'clf__C' : [1, 0.03, 10, 100, 1000, 10000, 32768], + 'clf__maxt_iter' : [1000, 500, 200, 100, 50, 10, 1], + 'clf__loss' : ['hinge', 'squared_hinge'], + 'clf_tol' : [1e-4, 1e-5, 1e-2, 1e-1] + + } +] + +####################################################################### +#======== +# SGD +# https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/sgd.py +#======== + +estimator = SGDClassifier(**rs, **njobs) + +# Define pipleline with steps +pipe_sgd = Pipeline([ + ('pre', MinMaxScaler()) + , ('fs', RFECV(DecisionTreeClassifier(**rs), cv = cv, scoring = 'matthews_corrcoef')) +# , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef')) + , ('clf', estimator) + ]) + +# Define hyperparmeter space to search for +param_grid_sgd = [ + { + 'fs__min_features_to_select' : [1,2] +# , 'fs__cv': [cv] + }, + + { +# 'clf': [SGDClassifier(**rs, **njobs)], + 'clf__loss': = ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'], + 'clf__penalty':['l1', 'l2', 'elasticnet'], + 'clf__alpha': [0.0000001, 0.00001, 0.0001, 0.01, 0.1, 1, 10, 100], #autosk learn: 1e-7, 1e-1, log=True, default_value=0.0001 + 'clf__learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'], + 'clf__eta0' : [0.0000001, 0.00001, 0.0001, 0.01] # autosklearn 1e-7, 1e-1, default_value=0.01, log=True + + } +]