tried pca
This commit is contained in:
parent
8d831f3613
commit
a15d801c2a
2 changed files with 35 additions and 28 deletions
|
@ -74,6 +74,7 @@ from sklearn.impute import KNNImputer as KNN
|
||||||
import json
|
import json
|
||||||
import argparse
|
import argparse
|
||||||
import re
|
import re
|
||||||
|
from sklearn.decomposition import PCA
|
||||||
#%% GLOBALS
|
#%% GLOBALS
|
||||||
rs = {'random_state': 42}
|
rs = {'random_state': 42}
|
||||||
njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores
|
njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores
|
||||||
|
@ -232,33 +233,33 @@ def MultModelsCl(input_df, target
|
||||||
models = [('AdaBoost Classifier' , AdaBoostClassifier(**rs) )
|
models = [('AdaBoost Classifier' , AdaBoostClassifier(**rs) )
|
||||||
, ('Bagging Classifier' , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True, verbose = 3, n_estimators = 100) )
|
, ('Bagging Classifier' , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True, verbose = 3, n_estimators = 100) )
|
||||||
, ('Decision Tree' , DecisionTreeClassifier(**rs) )
|
, ('Decision Tree' , DecisionTreeClassifier(**rs) )
|
||||||
, ('Extra Tree' , ExtraTreeClassifier(**rs) )
|
, ('Extra Tree' , ExtraTreeClassifier(**rs) )
|
||||||
, ('Extra Trees' , ExtraTreesClassifier(**rs) )
|
, ('Extra Trees' , ExtraTreesClassifier(**rs) )
|
||||||
, ('Gradient Boosting' , GradientBoostingClassifier(**rs) )
|
, ('Gradient Boosting' , GradientBoostingClassifier(**rs) )
|
||||||
, ('Gaussian NB' , GaussianNB() )
|
, ('Gaussian NB' , GaussianNB() )
|
||||||
, ('Gaussian Process' , GaussianProcessClassifier(**rs) )
|
, ('Gaussian Process' , GaussianProcessClassifier(**rs) )
|
||||||
, ('K-Nearest Neighbors' , KNeighborsClassifier() )
|
, ('K-Nearest Neighbors' , KNeighborsClassifier() )
|
||||||
, ('LDA' , LinearDiscriminantAnalysis() )
|
, ('LDA' , LinearDiscriminantAnalysis() )
|
||||||
, ('Logistic Regression' , LogisticRegression(**rs) )
|
, ('Logistic Regression' , LogisticRegression(**rs) )
|
||||||
, ('Logistic RegressionCV' , LogisticRegressionCV(cv = 3, **rs))
|
, ('Logistic RegressionCV' , LogisticRegressionCV(cv = 3, **rs))
|
||||||
, ('MLP' , MLPClassifier(max_iter = 500, **rs) )
|
, ('MLP' , MLPClassifier(max_iter = 500, **rs) )
|
||||||
#, ('Multinomial' , MultinomialNB() )
|
#, ('Multinomial' , MultinomialNB() )
|
||||||
, ('Naive Bayes' , BernoulliNB() )
|
, ('Naive Bayes' , BernoulliNB() )
|
||||||
, ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
|
, ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
|
||||||
, ('QDA' , QuadraticDiscriminantAnalysis() )
|
, ('QDA' , QuadraticDiscriminantAnalysis() )
|
||||||
, ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) )
|
, ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) )
|
||||||
# , ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
|
# , ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
|
||||||
# , n_estimators = 1000
|
# , n_estimators = 1000
|
||||||
# , bootstrap = True
|
# , bootstrap = True
|
||||||
# , oob_score = True
|
# , oob_score = True
|
||||||
# , **njobs
|
# , **njobs
|
||||||
# , **rs
|
# , **rs
|
||||||
# , max_features = 'auto') )
|
# , max_features = 'auto') )
|
||||||
, ('Ridge Classifier' , RidgeClassifier(**rs) )
|
, ('Ridge Classifier' , RidgeClassifier(**rs) )
|
||||||
, ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 3) )
|
, ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 3) )
|
||||||
, ('SVC' , SVC(**rs) )
|
, ('SVC' , SVC(**rs) )
|
||||||
, ('Stochastic GDescent' , SGDClassifier(**rs, **njobs) )
|
, ('Stochastic GDescent' , SGDClassifier(**rs, **njobs) )
|
||||||
, ('XGBoost' , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False, **njobs) )
|
, ('XGBoost' , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False, **njobs) )
|
||||||
]
|
]
|
||||||
|
|
||||||
mm_skf_scoresD = {}
|
mm_skf_scoresD = {}
|
||||||
|
@ -280,6 +281,12 @@ def MultModelsCl(input_df, target
|
||||||
model_pipeline = Pipeline([
|
model_pipeline = Pipeline([
|
||||||
('prep' , col_transform)
|
('prep' , col_transform)
|
||||||
, ('model' , model_fn)])
|
, ('model' , model_fn)])
|
||||||
|
|
||||||
|
# model_pipeline = Pipeline([
|
||||||
|
# ('prep' , col_transform)
|
||||||
|
# , ('pca' , PCA(n_components = 2))
|
||||||
|
# , ('model' , model_fn)])
|
||||||
|
|
||||||
|
|
||||||
print('\nRunning model pipeline:', model_pipeline)
|
print('\nRunning model pipeline:', model_pipeline)
|
||||||
skf_cv_modD = cross_validate(model_pipeline
|
skf_cv_modD = cross_validate(model_pipeline
|
||||||
|
|
|
@ -82,7 +82,7 @@ fooD = MultModelsCl(input_df = df2['X']
|
||||||
, tts_split_type = spl_type
|
, tts_split_type = spl_type
|
||||||
, resampling_type = 'none' # default
|
, resampling_type = 'none' # default
|
||||||
, var_type = ['mixed']
|
, var_type = ['mixed']
|
||||||
, scale_numeric = ['min_max_neg']
|
, scale_numeric = ['min_max']
|
||||||
, return_formatted_output = False
|
, return_formatted_output = False
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue