changed blind_test_input_df to blind_test_df in MultModelsCl

2022-06-22 16:42:04 +01:00 · 2022-06-22 16:42:04 +01:00 · 0350784d52
commit 0350784d52
parent bc12dbd7c2
114 changed files with 107251 additions and 863011 deletions
--- a/scripts/ml/MultModelsCl.py
+++ b/scripts/ml/MultModelsCl.py
@ -101,7 +101,7 @@ jacc_score_fn = {'jcc': make_scorer(jaccard_score)}
 #%%
 # Multiple Classification - Model Pipeline
 def MultModelsCl(input_df, target, skf_cv
-                       , blind_test_input_df
+                       , blind_test_df
                       , blind_test_target
                       , add_cm = True # adds confusion matrix based on cross_val_predict
                       , add_yn = True  # adds target var class numbers
@ -155,32 +155,32 @@ def MultModelsCl(input_df, target, skf_cv
            , ('Logistic RegressionCV'     , LogisticRegressionCV(**rs) )
            , ('Gaussian NB'               , GaussianNB() )
            , ('Naive Bayes'               , BernoulliNB() )
-            # , ('K-Nearest Neighbors'       , KNeighborsClassifier() ) 
-            # , ('SVC'                       , SVC(**rs) ) 
-            # , ('MLP'                       , MLPClassifier(max_iter = 500, **rs) ) 
-            # , ('Decision Tree'             , DecisionTreeClassifier(**rs) ) 
-            # , ('Extra Trees'               , ExtraTreesClassifier(**rs) ) 
-            # , ('Extra Tree'                , ExtraTreeClassifier(**rs) )
-            # , ('Random Forest'             , RandomForestClassifier(**rs, n_estimators = 1000 ) ) 
-            # , ('Random Forest2'            , RandomForestClassifier(min_samples_leaf = 5
-            #                                                         , n_estimators     = 1000
-            #                                                         , bootstrap        = True
-            #                                                         , oob_score        = True
-            #                                                         , **njobs
-            #                                                         , **rs
-            #                                                         , max_features     = 'auto') ) 
-            # , ('XGBoost'                   , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False) )
-            # , ('LDA'                       , LinearDiscriminantAnalysis() )
-            # , ('Multinomial'               , MultinomialNB() )
-            # , ('Passive Aggresive'         , PassiveAggressiveClassifier(**rs, **njobs) )
-            # , ('Stochastic GDescent'       , SGDClassifier(**rs, **njobs) )
-            # , ('AdaBoost Classifier'       , AdaBoostClassifier(**rs) )
-            # , ('Bagging Classifier'        , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True) )
-            # , ('Gaussian Process'          , GaussianProcessClassifier(**rs) )
-            # , ('Gradient Boosting'         , GradientBoostingClassifier(**rs) )
-            # , ('QDA'                       , QuadraticDiscriminantAnalysis() )
-            # , ('Ridge Classifier'          , RidgeClassifier(**rs)  )
-            # , ('Ridge ClassifierCV'        , RidgeClassifierCV(cv = 10) )
+            , ('K-Nearest Neighbors'       , KNeighborsClassifier() ) 
+            , ('SVC'                       , SVC(**rs) ) 
+            , ('MLP'                       , MLPClassifier(max_iter = 500, **rs) ) 
+            , ('Decision Tree'             , DecisionTreeClassifier(**rs) ) 
+            , ('Extra Trees'               , ExtraTreesClassifier(**rs) ) 
+            , ('Extra Tree'                , ExtraTreeClassifier(**rs) )
+            , ('Random Forest'             , RandomForestClassifier(**rs, n_estimators = 1000 ) ) 
+            , ('Random Forest2'            , RandomForestClassifier(min_samples_leaf = 5
+                                                                    , n_estimators     = 1000
+                                                                    , bootstrap        = True
+                                                                    , oob_score        = True
+                                                                    , **njobs
+                                                                    , **rs
+                                                                    , max_features     = 'auto') ) 
+            , ('XGBoost'                   , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False) )
+            , ('LDA'                       , LinearDiscriminantAnalysis() )
+            , ('Multinomial'               , MultinomialNB() )
+            , ('Passive Aggresive'         , PassiveAggressiveClassifier(**rs, **njobs) )
+            , ('Stochastic GDescent'       , SGDClassifier(**rs, **njobs) )
+            , ('AdaBoost Classifier'       , AdaBoostClassifier(**rs) )
+            , ('Bagging Classifier'        , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True) )
+            , ('Gaussian Process'          , GaussianProcessClassifier(**rs) )
+            , ('Gradient Boosting'         , GradientBoostingClassifier(**rs) )
+            , ('QDA'                       , QuadraticDiscriminantAnalysis() )
+            , ('Ridge Classifier'          , RidgeClassifier(**rs)  )
+            , ('Ridge ClassifierCV'        , RidgeClassifierCV(cv = 10) )
            ]
                
    mm_skf_scoresD = {}
@ -293,9 +293,9 @@ def MultModelsCl(input_df, target, skf_cv
        # Blind test: BTS results
        #=========================
        # Build the final results with all scores for the model
-        #bts_predict = gscv_fs.predict(blind_test_input_df)
+        #bts_predict = gscv_fs.predict(blind_test_df)
        model_pipeline.fit(input_df, target)
-        bts_predict = model_pipeline.predict(blind_test_input_df)
+        bts_predict = model_pipeline.predict(blind_test_df)
        
        bts_mcc_score = round(matthews_corrcoef(blind_test_target, bts_predict),2)
        print('\nMCC on Blind test:'     , bts_mcc_score)