diff --git a/classification_algo_names.py b/classification_algo_names.py new file mode 100644 index 0000000..46f4822 --- /dev/null +++ b/classification_algo_names.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Mar 18 09:47:48 2022 + +@author: tanu +""" + +#%% Useful links +# https://stackoverflow.com/questions/41844311/list-of-all-classification-algorithms +# https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html +# https://github.com/davidsbatista/machine-learning-notebooks/blob/master/hyperparameter-across-models.ipynb +# https://scikit-learn.org/stable/modules/svm.html#classification +# https://machinelearningmastery.com/hyperparameters-for-classification-machine-learning-algorithms/ # [params] +# https://uk.mathworks.com/help/stats/hyperparameter-optimization-in-classification-learner-app.html [ algo] +# As a general rule of thumb, it is required to run baseline models on the dataset. I know H2O- AutoML and other AutoML packages do this. But I want to try using Scikit-learn Pipeline, + # https://codereview.stackexchange.com/questions/256934/model-pipeline-to-run-multiple-classifiers-for-ml-classification + +names = [ + "Nearest Neighbors", + "Linear SVM", + "RBF SVM", + "Gaussian Process", + "Decision Tree", + "Random Forest", + "Neural Net", + "AdaBoost", + "Naive Bayes", + "QDA", +] + +classifiers = [ + KNeighborsClassifier(3), + SVC(kernel="linear", C=0.025), + SVC(gamma=2, C=1), + GaussianProcessClassifier(1.0 * RBF(1.0)), + DecisionTreeClassifier(max_depth=5), + RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), + MLPClassifier(alpha=1, max_iter=1000), + AdaBoostClassifier(), + GaussianNB(), + QuadraticDiscriminantAnalysis(), +]