added option to add confusion matrix and target numbers in the mult function

2022-06-20 17:08:22 +01:00 · 2022-06-20 17:08:22 +01:00 · 135efcee41
commit 135efcee41
parent 905327bf4e
3 changed files with 144 additions and 140 deletions
--- a/scripts/ml/MultModelsCl.py
+++ b/scripts/ml/MultModelsCl.py
@ -137,95 +137,76 @@ def MultModelsCl(input_df, target, skf_cv
    col_transform = ColumnTransformer(transformers = t
                                       , remainder='passthrough')
    
-    # Specify multiple Classification models
-    lr      = LogisticRegression(**rs)
-    lrcv    = LogisticRegressionCV(**rs)
-    gnb     = GaussianNB()
-    nb      = BernoulliNB()
-    knn     = KNeighborsClassifier()
-    svc     = SVC(**rs)
-    mlp     = MLPClassifier(max_iter = 500, **rs)
-    dt      = DecisionTreeClassifier(**rs)
-    ets     = ExtraTreesClassifier(**rs)
-    et      = ExtraTreeClassifier(**rs)  
-    rf      = RandomForestClassifier(**rs, n_estimators = 1000 )
-    rf2     = RandomForestClassifier(
-                          min_samples_leaf = 5
-                          , n_estimators     = 1000
-                          , bootstrap        = True
-                          , oob_score        = True
-                          , **njobs
-                          , **rs
-                          , max_features     = 'auto')
-    xgb = XGBClassifier(**rs, verbosity = 0, use_label_encoder =False)
-                        
-    lda = LinearDiscriminantAnalysis()
-    
-    mnb = MultinomialNB()
-    
-    pa  = PassiveAggressiveClassifier(**rs, **njobs)
-    
-    sgd = SGDClassifier(**rs, **njobs)  
-
-    abc = AdaBoostClassifier(**rs)
-    bc  = BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True)
-    gpc = GaussianProcessClassifier(**rs)
-    gbc = GradientBoostingClassifier(**rs)
-    qda = QuadraticDiscriminantAnalysis()
-    rc  = RidgeClassifier(**rs)
-    rccv  = RidgeClassifierCV(cv = 10)
-    
-    models = [('Logistic Regression'       , lr)
-            , ('Logistic RegressionCV'     , lrcv)
-            , ('Gaussian NB'               , gnb)
-            , ('Naive Bayes'               , nb)
-            , ('K-Nearest Neighbors'       , knn) 
-            , ('SVC'                       , svc) 
-            , ('MLP'                       , mlp) 
-            , ('Decision Tree'             , dt) 
-            , ('Extra Trees'               , ets) 
-            , ('Extra Tree'                , et)
-            , ('Random Forest'             , rf) 
-            , ('Random Forest2'            , rf2) 
-            , ('XGBoost'                   , xgb)
-            , ('LDA'                       , lda)
-            , ('Multinomial'               , mnb)
-            , ('Passive Aggresive'         , pa)
-            , ('Stochastic GDescent'       , sgd)
-            , ('AdaBoost Classifier'       , abc)
-            , ('Bagging Classifier'        , bc)
-            , ('Gaussian Process'          , gpc)
-            , ('Gradient Boosting'         , gbc)
-            , ('QDA'                       , qda)
-            , ('Ridge Classifier'          , rc)
-            , ('Ridge ClassifierCV'        , rccv)
+    # Specify multiple Classification models  
+    models = [('Logistic Regression'       , LogisticRegression(**rs) )
+            , ('Logistic RegressionCV'     , LogisticRegressionCV(**rs) )
+            , ('Gaussian NB'               , GaussianNB() )
+            , ('Naive Bayes'               , BernoulliNB() )
+            , ('K-Nearest Neighbors'       , KNeighborsClassifier() ) 
+            , ('SVC'                       , SVC(**rs) ) 
+            , ('MLP'                       , MLPClassifier(max_iter = 500, **rs) ) 
+            , ('Decision Tree'             , DecisionTreeClassifier(**rs) ) 
+            , ('Extra Trees'               , ExtraTreesClassifier(**rs) ) 
+            , ('Extra Tree'                , ExtraTreeClassifier(**rs) )
+            , ('Random Forest'             , RandomForestClassifier(**rs, n_estimators = 1000 ) ) 
+            , ('Random Forest2'            , RandomForestClassifier(min_samples_leaf = 5
+                                                                    , n_estimators     = 1000
+                                                                    , bootstrap        = True
+                                                                    , oob_score        = True
+                                                                    , **njobs
+                                                                    , **rs
+                                                                    , max_features     = 'auto') ) 
+            , ('XGBoost'                   , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False) )
+            , ('LDA'                       , LinearDiscriminantAnalysis() )
+            , ('Multinomial'               , MultinomialNB() )
+            , ('Passive Aggresive'         , PassiveAggressiveClassifier(**rs, **njobs) )
+            , ('Stochastic GDescent'       , SGDClassifier(**rs, **njobs) )
+            , ('AdaBoost Classifier'       , AdaBoostClassifier(**rs) )
+            , ('Bagging Classifier'        , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True) )
+            , ('Gaussian Process'          , GaussianProcessClassifier(**rs) )
+            , ('Gradient Boosting'         , GradientBoostingClassifier(**rs) )
+            , ('QDA'                       , QuadraticDiscriminantAnalysis() )
+            , ('Ridge Classifier'          , RidgeClassifier(**rs)  )
+            , ('Ridge ClassifierCV'        , RidgeClassifierCV(cv = 10) )
            ]
        
    mm_skf_scoresD = {}
-     
-    for model_name, model_fn in models:
-        print('\nModel_name:', model_name
-        , '\nModel func:'    , model_fn
-        , '\nList of models:', models)
+   
+    print('\n==============================================================\n'
+          , '\nRunning several classification models (n):', len(models)
+          ,'\nList of models:')
+    for m in models:
+        print(m)
+    print('\n================================================================\n')
    
+    index = 1
+    for model_name, model_fn in models:
+        print('\nRunning classifier:', index
+              , '\nModel_name:'               , model_name
+              , '\nModel func:'               , model_fn)
+        index = index+1
+        
        model_pipeline = Pipeline([
            ('prep'     , col_transform)
            , ('model'  , model_fn)])
            
-        print('Running model pipeline:', model_pipeline)
-        skf_cv_mod = cross_validate(model_pipeline
+        print('\nRunning model pipeline:', model_pipeline)
+        skf_cv_modD = cross_validate(model_pipeline
                              , input_df
                              , target
                              , cv = skf_cv
                              , scoring = scoring_fn
-                              , return_train_score = True)
+                              , return_train_score = True) 
+        
+        #==============================
+        # Extract mean values for CV 
+        #==============================
        mm_skf_scoresD[model_name] = {}
-        for key, value in skf_cv_mod.items():
+        
+        for key, value in skf_cv_modD.items():
            print('\nkey:', key, '\nvalue:', value)
            print('\nmean value:', mean(value))
            mm_skf_scoresD[model_name][key] = round(mean(value),2)
-            #pp.pprint(mm_skf_scoresD)
-            #cvtrain_mcc = mm_skf_scoresD[model_name]['test_mcc']
            
    #return(mm_skf_scoresD)
 #%%