added all autosklearn informed hyperparams

2022-05-26 04:51:47 +01:00 · 2022-05-26 04:51:47 +01:00 · 8f8306d948
commit 8f8306d948
parent b5d29dd449
2 changed files with 141 additions and 7 deletions
--- a/MultClassPipe3.py
+++ b/MultClassPipe3.py
@ -126,14 +126,21 @@ def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categ
                                    n_estimators     = 1000 )
    rf2     = RandomForestClassifier(
                          min_samples_leaf = 5
-                          , n_estimators     = 1000
+                          , n_estimators     = 100 #10
                          , bootstrap        = True
                          , oob_score        = True
                          , **njobs
                          , **rs
                          , max_features     = 'auto')
-    xgb = XGBClassifier(**rs
-                        , verbosity = 0, use_label_encoder =False)
+    xgb = XGBClassifier(**rs, verbosity = 0, use_label_encoder =False)
+                        
+    lda = LinearDiscriminantAnalysis()
+    
+    mnb = MultinomialNB(**rs)
+    
+    pa  = PassiveAggressiveClassifier(**rs, **njobs)
+    
+    sgd = SGDClassifier(**rs, **njobs)          

    models = [('Logistic Regression', log_reg)
            , ('Naive Bayes'        , nb)
@ -145,7 +152,11 @@ def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categ
            , ('Random Forest'      , rf) 
            , ('Naive Bayes'        , nb)
            , ('Random Forest2'     , rf2) 
-            , ('XGBoost'            , xgb)]
+            , ('XGBoost'            , xgb)
+            , ('LDA'                , lda)
+            , ('MultinomialNB'      , mnb)
+            , ('PassiveAggresive'   , pa)
+            , ('StochasticGDescent' , sgd)]
        
    mm_skf_scoresD = {}
     
--- a/classification_params_FS.py
+++ b/classification_params_FS.py
@ -7,8 +7,6 @@
 # https://github.com/automl/auto-sklearn/tree/master/autosklearn/pipeline/components/classification

 # TOADD: 
-# Extra Trees
-https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/extra_trees.py
 # LDA
 https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/lda.py
 # Multinomial_nb
@ -565,13 +563,138 @@ param_grid_svc = [
 #        'clf': [SVC(**rs)],    
 #        'clf__kernel': ['poly', 'rbf', 'sigmoid']
       , 'clf__kernel': ['rbf']
-       , 'clf__C'     : [50, 10, 1.0, 0.1, 0.01]
+#       , 'clf__C'     : [50, 10, 1.0, 0.1, 0.01]
       , 'clf__C'     : [1, 0.03, 10, 100, 1000, 10000, 32768]
       , 'clf__gamma' : ['scale', 'auto'] 
        
        }
 ]
+#######################################################################
+#######################################################################
+#========
+# LDA
+# https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/lda.py
+#========
+
+estimator =  LinearDiscriminantAnalysis()
+
+# Define pipleline with steps
+pipe_lda = Pipeline([
+    ('pre', MinMaxScaler())
+    , ('fs', RFECV(DecisionTreeClassifier(**rs), cv = cv, scoring = 'matthews_corrcoef'))
+#    , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef'))
+    , ('clf',  estimator)
+    ])
+
+# Define hyperparmeter space to search for
+param_grid_lda = [
+    {
+    'fs__min_features_to_select' : [1,2]
+#     , 'fs__cv': [cv]
+     },
+             
+    {
+#        'clf': [LinearDiscriminantAnalysis()],    
+       'clf__solver'    : ['svd', 'lsqr', 'eigen'],
+       'clf__shrinkage' : [None, 'auto', 0, 0.5, 1],
+
+        }
+]

 #######################################################################
+#========
+# Multinomial_nb
+# https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/multinomial_nb.py
+#========
+
+estimator =  MultinomialNB(**rs)
+
+# Define pipleline with steps
+pipe_mnb = Pipeline([
+    ('pre', MinMaxScaler())
+    , ('fs', RFECV(DecisionTreeClassifier(), cv = cv, scoring = 'matthews_corrcoef'))
+#    , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef'))
+    , ('clf',  estimator)
+    ])
+
+# Define hyperparmeter space to search for
+param_grid_mnb = [
+    {
+    'fs__min_features_to_select' : [1,2]
+#     , 'fs__cv': [cv]
+     },
+             
+    {
+#      'clf': [MultinomialNB()],    
+       'clf__alpha': [0.01, 0.1, 1, 20, 25, 50, 55, 100]

        
+        }
+]
+#######################################################################
+#========
+# passive_aggressive
+# https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/passive_aggressive.py
+#========
+
+estimator =  PassiveAggressiveClassifier(**rs, **njobs)
+
+# Define pipleline with steps
+pipe_pa = Pipeline([
+    ('pre', MinMaxScaler())
+    , ('fs', RFECV(DecisionTreeClassifier(**rs), cv = cv, scoring = 'matthews_corrcoef'))
+#    , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef'))
+    , ('clf',  estimator)
+    ])
+
+# Define hyperparmeter space to search for
+param_grid_pa = [
+    {
+    'fs__min_features_to_select' : [1,2]
+#     , 'fs__cv': [cv]
+     },
+             
+    {
+#        'clf': [PassiveAggressiveClassifier(**rs, **njobs)],    
+       'clf__C'         : [1, 0.03, 10, 100, 1000, 10000, 32768], 
+       'clf__maxt_iter' : [1000, 500, 200, 100, 50, 10, 1],
+       'clf__loss'      : ['hinge', 'squared_hinge'],
+       'clf_tol'        : [1e-4, 1e-5, 1e-2, 1e-1]
+        
+        }
+]
+
+#######################################################################
+#========
+# SGD
+# https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/sgd.py
+#========
+
+estimator =  SGDClassifier(**rs, **njobs)
+
+# Define pipleline with steps
+pipe_sgd = Pipeline([
+    ('pre', MinMaxScaler())
+    , ('fs', RFECV(DecisionTreeClassifier(**rs), cv = cv, scoring = 'matthews_corrcoef'))
+#    , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef'))
+    , ('clf',  estimator)
+    ])
+
+# Define hyperparmeter space to search for
+param_grid_sgd = [
+    {
+    'fs__min_features_to_select' : [1,2]
+#     , 'fs__cv': [cv]
+     },
+             
+    {
+#        'clf': [SGDClassifier(**rs, **njobs)],    
+        'clf__loss': = ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'], 
+        'clf__penalty':['l1', 'l2', 'elasticnet'], 
+        'clf__alpha': [0.0000001, 0.00001, 0.0001, 0.01, 0.1, 1, 10, 100], #autosk learn: 1e-7, 1e-1, log=True, default_value=0.0001
+        'clf__learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'],
+        'clf__eta0' : [0.0000001, 0.00001, 0.0001, 0.01] # autosklearn 1e-7, 1e-1, default_value=0.01, log=True
+        
+        }
+]
+