optimised run_7030.py to generate ouput from dict now that the processfunction and parameter dicts have been added

2022-06-24 15:40:18 +01:00 · 2022-06-24 15:40:18 +01:00 · b37a950fec
commit b37a950fec
parent 7dc7e25016
12 changed files with 180 additions and 128408 deletions
--- a/scripts/ml/MultClfs.py
+++ b/scripts/ml/MultClfs.py
@ -197,35 +197,35 @@ def MultModelsCl(input_df, target, skf_cv
    # Specify multiple Classification Models  
    #======================================================
    models = [('AdaBoost Classifier'     , AdaBoostClassifier(**rs) )
-            #  , ('Bagging Classifier'        , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True) )
-            #   , ('Decision Tree'             , DecisionTreeClassifier(**rs) ) 
-            #   , ('Extra Tree'                , ExtraTreeClassifier(**rs) )
-            #   , ('Extra Trees'               , ExtraTreesClassifier(**rs) ) 
-            #   , ('Gradient Boosting'         , GradientBoostingClassifier(**rs) )
-            #   , ('Gaussian NB'               , GaussianNB() )
-            #   , ('Gaussian Process'          , GaussianProcessClassifier(**rs) )
-            #   , ('K-Nearest Neighbors'       , KNeighborsClassifier() ) 
-            #   , ('LDA'                       , LinearDiscriminantAnalysis() )
+               , ('Bagging Classifier'        , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True) )
+               , ('Decision Tree'             , DecisionTreeClassifier(**rs) ) 
+               , ('Extra Tree'                , ExtraTreeClassifier(**rs) )
+               , ('Extra Trees'               , ExtraTreesClassifier(**rs) ) 
+               , ('Gradient Boosting'         , GradientBoostingClassifier(**rs) )
+               , ('Gaussian NB'               , GaussianNB() )
+               , ('Gaussian Process'          , GaussianProcessClassifier(**rs) )
+               , ('K-Nearest Neighbors'       , KNeighborsClassifier() ) 
+               , ('LDA'                       , LinearDiscriminantAnalysis() )
               , ('Logistic Regression'       , LogisticRegression(**rs) )
-            #   , ('Logistic RegressionCV'     , LogisticRegressionCV(cv = 3, **rs))
-            #   , ('MLP'                       , MLPClassifier(max_iter = 500, **rs) ) 
-            #   , ('Multinomial'               , MultinomialNB() )
-            #   , ('Naive Bayes'               , BernoulliNB() )
-            #   , ('Passive Aggresive'         , PassiveAggressiveClassifier(**rs, **njobs) )
-            #   , ('QDA'                       , QuadraticDiscriminantAnalysis() )
-            #   , ('Random Forest'             , RandomForestClassifier(**rs, n_estimators = 1000 ) ) 
-            #    , ('Random Forest2'            , RandomForestClassifier(min_samples_leaf = 5
-            #                                                          , n_estimators     = 1000
-            #                                                          , bootstrap        = True
-            #                                                          , oob_score        = True
-            #                                                          , **njobs
-            #                                                          , **rs
-            #                                                          , max_features     = 'auto') ) 
-            #   , ('Ridge Classifier'          , RidgeClassifier(**rs)  )
-            #   , ('Ridge ClassifierCV'        , RidgeClassifierCV(cv = 3) )          
-            #   , ('SVC'                       , SVC(**rs) ) 
-            #   , ('Stochastic GDescent'       , SGDClassifier(**rs, **njobs) )
-            #   , ('XGBoost'                   , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False) )
+               , ('Logistic RegressionCV'     , LogisticRegressionCV(cv = 3, **rs))
+               , ('MLP'                       , MLPClassifier(max_iter = 500, **rs) ) 
+               , ('Multinomial'               , MultinomialNB() )
+               , ('Naive Bayes'               , BernoulliNB() )
+               , ('Passive Aggresive'         , PassiveAggressiveClassifier(**rs, **njobs) )
+               , ('QDA'                       , QuadraticDiscriminantAnalysis() )
+               , ('Random Forest'             , RandomForestClassifier(**rs, n_estimators = 1000 ) ) 
+               , ('Random Forest2'            , RandomForestClassifier(min_samples_leaf = 5
+                                                                       , n_estimators     = 1000
+                                                                       , bootstrap        = True
+                                                                       , oob_score        = True
+                                                                       , **njobs
+                                                                       , **rs
+                                                                       , max_features     = 'auto') ) 
+                , ('Ridge Classifier'          , RidgeClassifier(**rs)  )
+                , ('Ridge ClassifierCV'        , RidgeClassifierCV(cv = 3) )          
+                , ('SVC'                       , SVC(**rs) ) 
+                , ('Stochastic GDescent'       , SGDClassifier(**rs, **njobs) )
+                , ('XGBoost'                   , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False) )
             ]
                
    mm_skf_scoresD = {}
@ -440,10 +440,11 @@ def ProcessMultModelsCl(inputD = {}):
          , '\nCV df:', len(scoresDF_CV.columns)
          , '\nBT_df:', len(scoresDF_BT.columns)
          , '\nmetaDF:', len(metaDF.columns))
+    
    if  len(scoresDF_CV.columns) == len(scoresDF_BT.columns):
        print('\nFirst proceeding to rowbind CV and BT dfs:')
        expected_ncols_out = len(scoresDF_BT.columns) + len(metaDF.columns)
-        print('\nFinal output should have:',expected_ncols_out, 'columns' )
+        print('\nFinal output should have:', expected_ncols_out, 'columns' )

    #-----------------
    # Combine WF
@ -496,8 +497,7 @@ def ProcessMultModelsCl(inputD = {}):
            sys.exit('\nFIRST IF FAILS')
    else:
        print('\nConcatenting dfs not possible [WF],check numbers ')    
-    
-    
+
    #-------------------------------------
    # Combine WF+Metadata: Final output
    #-------------------------------------
@ -515,11 +515,15 @@ def ProcessMultModelsCl(inputD = {}):
        print('\nPASS: Combined df has expected ncols')
    else:
        sys.exit('\nFAIL: Length mismatch for combined_df')
+        
+    print('\nAdding column: Model_name')
+    
+    combDF['Model_name'] = combDF.index
    
    print('\n========================================================='
          , '\nSUCCESS: Ran multiple classifiers'
          , '\n=======================================================')
-        
+
    #resampling_methods_wf = combined_baseline_wf[['resampling']]
    #resampling_methods_wf = resampling_methods_wf.drop_duplicates()
              #, '\n', resampling_methods_wf)