tried pca

2022-07-05 23:05:37 +01:00 · 2022-07-05 23:05:37 +01:00 · a15d801c2a
commit a15d801c2a
parent 8d831f3613
2 changed files with 35 additions and 28 deletions
--- a/scripts/ml/ml_functions/MultClfs.py
+++ b/scripts/ml/ml_functions/MultClfs.py
@ -74,6 +74,7 @@ from sklearn.impute import KNNImputer as KNN
 import json
 import argparse
 import re
+from sklearn.decomposition import PCA
 #%% GLOBALS
 rs = {'random_state': 42}
 njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores
@ -281,6 +282,12 @@ def MultModelsCl(input_df, target
            ('prep'     , col_transform)
            , ('model'  , model_fn)])
        
+        # model_pipeline = Pipeline([
+        #     ('prep'     , col_transform)
+        #     ,  ('pca'   , PCA(n_components = 2))
+        #     , ('model'  , model_fn)])
+            
+            
        print('\nRunning model pipeline:', model_pipeline)
        skf_cv_modD = cross_validate(model_pipeline
                              , input_df
--- a/scripts/ml/ml_functions/test_func_singlegene.py
+++ b/scripts/ml/ml_functions/test_func_singlegene.py
@ -82,7 +82,7 @@ fooD = MultModelsCl(input_df = df2['X']
                , tts_split_type  = spl_type
                , resampling_type = 'none' # default
                , var_type = ['mixed']
-                , scale_numeric = ['min_max_neg']
+                , scale_numeric = ['min_max']
                , return_formatted_output = False

                )