added all autosklearn informed hyperparams

2022-05-26 04:51:47 +01:00 · 2022-05-26 04:51:47 +01:00 · 8f8306d948
commit 8f8306d948
parent b5d29dd449
2 changed files with 141 additions and 7 deletions
--- a/MultClassPipe3.py
+++ b/MultClassPipe3.py
@ -126,14 +126,21 @@ def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categ
                                    n_estimators     = 1000 )
    rf2     = RandomForestClassifier(
                          min_samples_leaf = 5
-                          , n_estimators     = 1000
+                          , n_estimators     = 100 #10
                          , bootstrap        = True
                          , oob_score        = True
                          , **njobs
                          , **rs
                          , max_features     = 'auto')
-    xgb = XGBClassifier(**rs
+    xgb = XGBClassifier(**rs, verbosity = 0, use_label_encoder =False)
-                        , verbosity = 0, use_label_encoder =False)
+                        
    lda = LinearDiscriminantAnalysis()
    mnb = MultinomialNB(**rs)
    pa  = PassiveAggressiveClassifier(**rs, **njobs)
    sgd = SGDClassifier(**rs, **njobs)          
    models = [('Logistic Regression', log_reg)
            , ('Naive Bayes'        , nb)
@ -145,7 +152,11 @@ def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categ
            , ('Random Forest'      , rf) 
            , ('Naive Bayes'        , nb)
            , ('Random Forest2'     , rf2) 
-            , ('XGBoost'            , xgb)]
+            , ('XGBoost'            , xgb)
            , ('LDA'                , lda)
            , ('MultinomialNB'      , mnb)
            , ('PassiveAggresive'   , pa)
            , ('StochasticGDescent' , sgd)]
    mm_skf_scoresD = {}
--- a/classification_params_FS.py
+++ b/classification_params_FS.py
@ -7,8 +7,6 @@
 # https://github.com/automl/auto-sklearn/tree/master/autosklearn/pipeline/components/classification
 # TOADD: 
 # Extra Trees
 https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/extra_trees.py
 # LDA
 https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/lda.py
 # Multinomial_nb
@ -565,13 +563,138 @@ param_grid_svc = [
 #        'clf': [SVC(**rs)],    
 #        'clf__kernel': ['poly', 'rbf', 'sigmoid']
       , 'clf__kernel': ['rbf']
-       , 'clf__C'     : [50, 10, 1.0, 0.1, 0.01]
+#       , 'clf__C'     : [50, 10, 1.0, 0.1, 0.01]
       , 'clf__C'     : [1, 0.03, 10, 100, 1000, 10000, 32768]
       , 'clf__gamma' : ['scale', 'auto'] 
        }
 ]
 #######################################################################
 #######################################################################
 #========
 # LDA
 # https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/lda.py
 #========
 estimator =  LinearDiscriminantAnalysis()
 # Define pipleline with steps
 pipe_lda = Pipeline([
    ('pre', MinMaxScaler())
    , ('fs', RFECV(DecisionTreeClassifier(**rs), cv = cv, scoring = 'matthews_corrcoef'))
 #    , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef'))
    , ('clf',  estimator)
    ])
 # Define hyperparmeter space to search for
 param_grid_lda = [
    {
    'fs__min_features_to_select' : [1,2]
 #     , 'fs__cv': [cv]
     },
    {
 #        'clf': [LinearDiscriminantAnalysis()],    
       'clf__solver'    : ['svd', 'lsqr', 'eigen'],
       'clf__shrinkage' : [None, 'auto', 0, 0.5, 1],
        }
 ]
 #######################################################################
 #========
 # Multinomial_nb
 # https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/multinomial_nb.py
 #========
 estimator =  MultinomialNB(**rs)
 # Define pipleline with steps
 pipe_mnb = Pipeline([
    ('pre', MinMaxScaler())
    , ('fs', RFECV(DecisionTreeClassifier(), cv = cv, scoring = 'matthews_corrcoef'))
 #    , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef'))
    , ('clf',  estimator)
    ])
 # Define hyperparmeter space to search for
 param_grid_mnb = [
    {
    'fs__min_features_to_select' : [1,2]
 #     , 'fs__cv': [cv]
     },
    {
 #      'clf': [MultinomialNB()],    
       'clf__alpha': [0.01, 0.1, 1, 20, 25, 50, 55, 100]
        }
 ]
 #######################################################################
 #========
 # passive_aggressive
 # https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/passive_aggressive.py
 #========
 estimator =  PassiveAggressiveClassifier(**rs, **njobs)
 # Define pipleline with steps
 pipe_pa = Pipeline([
    ('pre', MinMaxScaler())
    , ('fs', RFECV(DecisionTreeClassifier(**rs), cv = cv, scoring = 'matthews_corrcoef'))
 #    , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef'))
    , ('clf',  estimator)
    ])
 # Define hyperparmeter space to search for
 param_grid_pa = [
    {
    'fs__min_features_to_select' : [1,2]
 #     , 'fs__cv': [cv]
     },
    {
 #        'clf': [PassiveAggressiveClassifier(**rs, **njobs)],    
       'clf__C'         : [1, 0.03, 10, 100, 1000, 10000, 32768], 
       'clf__maxt_iter' : [1000, 500, 200, 100, 50, 10, 1],
       'clf__loss'      : ['hinge', 'squared_hinge'],
       'clf_tol'        : [1e-4, 1e-5, 1e-2, 1e-1]
        }
 ]
 #######################################################################
 #========
 # SGD
 # https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/sgd.py
 #========
 estimator =  SGDClassifier(**rs, **njobs)
 # Define pipleline with steps
 pipe_sgd = Pipeline([
    ('pre', MinMaxScaler())
    , ('fs', RFECV(DecisionTreeClassifier(**rs), cv = cv, scoring = 'matthews_corrcoef'))
 #    , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef'))
    , ('clf',  estimator)
    ])
 # Define hyperparmeter space to search for
 param_grid_sgd = [
    {
    'fs__min_features_to_select' : [1,2]
 #     , 'fs__cv': [cv]
     },
    {
 #        'clf': [SGDClassifier(**rs, **njobs)],    
        'clf__loss': = ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'], 
        'clf__penalty':['l1', 'l2', 'elasticnet'], 
        'clf__alpha': [0.0000001, 0.00001, 0.0001, 0.01, 0.1, 1, 10, 100], #autosk learn: 1e-7, 1e-1, log=True, default_value=0.0001
        'clf__learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'],
        'clf__eta0' : [0.0000001, 0.00001, 0.0001, 0.01] # autosklearn 1e-7, 1e-1, default_value=0.01, log=True
        }
 ]