43 lines
1.6 KiB
Python
43 lines
1.6 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Fri Mar 18 09:47:48 2022
|
|
|
|
@author: tanu
|
|
"""
|
|
|
|
#%% Useful links
|
|
# https://stackoverflow.com/questions/41844311/list-of-all-classification-algorithms
|
|
# https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
|
|
# https://github.com/davidsbatista/machine-learning-notebooks/blob/master/hyperparameter-across-models.ipynb
|
|
# https://scikit-learn.org/stable/modules/svm.html#classification
|
|
# https://machinelearningmastery.com/hyperparameters-for-classification-machine-learning-algorithms/ # [params]
|
|
# https://uk.mathworks.com/help/stats/hyperparameter-optimization-in-classification-learner-app.html [ algo]
|
|
# As a general rule of thumb, it is required to run baseline models on the dataset. I know H2O- AutoML and other AutoML packages do this. But I want to try using Scikit-learn Pipeline,
|
|
# https://codereview.stackexchange.com/questions/256934/model-pipeline-to-run-multiple-classifiers-for-ml-classification
|
|
|
|
names = [
|
|
"Nearest Neighbors",
|
|
"Linear SVM",
|
|
"RBF SVM",
|
|
"Gaussian Process",
|
|
"Decision Tree",
|
|
"Random Forest",
|
|
"Neural Net",
|
|
"AdaBoost",
|
|
"Naive Bayes",
|
|
"QDA",
|
|
]
|
|
|
|
classifiers = [
|
|
KNeighborsClassifier(3),
|
|
SVC(kernel="linear", C=0.025),
|
|
SVC(gamma=2, C=1),
|
|
GaussianProcessClassifier(1.0 * RBF(1.0)),
|
|
DecisionTreeClassifier(max_depth=5),
|
|
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
|
|
MLPClassifier(alpha=1, max_iter=1000),
|
|
AdaBoostClassifier(),
|
|
GaussianNB(),
|
|
QuadraticDiscriminantAnalysis(),
|
|
]
|