moved logo_skf function to del as using the MultClfs for combined data

2022-07-28 12:24:24 +01:00 · 2022-07-28 12:24:24 +01:00 · 2c50124b1b
commit 2c50124b1b
parent a6532ddfa3
8 changed files with 71 additions and 1735 deletions
--- a/scripts/ml/ml_functions/MultClfs.py
+++ b/scripts/ml/ml_functions/MultClfs.py
@ -92,10 +92,10 @@ scoring_fn =  ({ 'mcc'        : make_scorer(matthews_corrcoef)
                , 'roc_auc'   : make_scorer(roc_auc_score)
                , 'jcc'       : make_scorer(jaccard_score)
            }) 
-  
+# for sel_cv INSIDE FUNCTION CALL NOW
 #skf_cv = StratifiedKFold(n_splits = 10
 #                          #, shuffle = False, random_state= None)
-#                           , shuffle = True,**rs)
+#                           , shuffle = True, **rs)

 #rskf_cv = RepeatedStratifiedKFold(n_splits = 10
 #                                  , n_repeats = 3
@ -149,25 +149,26 @@ scoreBT_mapD = {'bts_mcc'          : 'MCC'
 # Run Multiple Classifiers
 ############################
 # Multiple Classification - Model Pipeline
-def MultModelsCl(input_df, target
-                       , sel_cv
-                       , tts_split_type
-                       , resampling_type
-                       #, group = None
-                       
-                       , add_cm = True # adds confusion matrix based on cross_val_predict
-                       , add_yn = True  # adds target var class numbers
-                       , var_type = ['numerical', 'categorical','mixed']
-                       , scale_numeric = ['min_max', 'std', 'min_max_neg', 'none'] 
+def MultModelsCl(input_df
+                 , target
+                 , sel_cv
+                 , tts_split_type
+                 , resampling_type
+                 #, group = None
+                
+                 , add_cm = True # adds confusion matrix based on cross_val_predict
+                 , add_yn = True  # adds target var class numbers
+                 , var_type = ['numerical', 'categorical','mixed']
+                 , scale_numeric = ['min_max', 'std', 'min_max_neg', 'none'] 

-                       , run_blind_test = True
-                       , blind_test_df = pd.DataFrame()
-                       , blind_test_target = pd.Series(dtype = int)
-                       , return_formatted_output = True
+                 , run_blind_test = True
+                 , blind_test_df = pd.DataFrame()
+                 , blind_test_target = pd.Series(dtype = int)
+                 , return_formatted_output = True

-                       , random_state = 42
-                       , n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores
-                       ):
+                 , random_state = 42
+                 , n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores
+                 ):

    '''
    @ param input_df: input features 
@ -357,10 +358,9 @@ def MultModelsCl(input_df, target
           y_pred   = cross_val_predict(model_pipeline
                                        , input_df
                                        , target
-                                        #, commented out thing,
-                                        , cv=sel_cv
-                                        , **njobs
-                                        )
+                                        , cv = sel_cv
+                                        #, groups = group
+                                        , **njobs)
            #_tn, _fp, _fn, _tp = confusion_matrix(y_pred, y).ravel() # internally
           tn, fp, fn, tp = confusion_matrix(y_pred, target).ravel()