#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Mar 18 09:47:48 2022 @author: tanu """ #%% Useful links # https://stackoverflow.com/questions/41844311/list-of-all-classification-algorithms # https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html # https://github.com/davidsbatista/machine-learning-notebooks/blob/master/hyperparameter-across-models.ipynb # https://scikit-learn.org/stable/modules/svm.html#classification # https://machinelearningmastery.com/hyperparameters-for-classification-machine-learning-algorithms/ # [params] # https://uk.mathworks.com/help/stats/hyperparameter-optimization-in-classification-learner-app.html [ algo] # As a general rule of thumb, it is required to run baseline models on the dataset. I know H2O- AutoML and other AutoML packages do this. But I want to try using Scikit-learn Pipeline, # https://codereview.stackexchange.com/questions/256934/model-pipeline-to-run-multiple-classifiers-for-ml-classification names = [ "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process", "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes", "QDA", ] classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), GaussianProcessClassifier(1.0 * RBF(1.0)), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), MLPClassifier(alpha=1, max_iter=1000), AdaBoostClassifier(), GaussianNB(), QuadraticDiscriminantAnalysis(), ]