#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri May 20 00:36:17 2022 @author: tanu """ # pnca [ numerical ONLY + NO oversampling] # LR: hyperparm {'clf__estimator': LogisticRegression(penalty='l1', random_state=42, solver='saga'), 'clf__estimator__C': 1.0, 'clf__estimator__max_iter': 100, 'clf__estimator__penalty': 'l1', 'clf__estimator__solver': 'saga'} Logistic_Regression bts_fscore 0.70 bts_mcc 0.29 bts_precision 0.57 bts_recall 0.92 bts_accuracy 0.61 bts_roc_auc 0.61 bts_jaccard 0.54 # LR: FS + hyperparam {'bts_fscore': 0.71, 'bts_mcc': 0.34, 'bts_precision': 0.61, 'bts_recall': 0.87, 'bts_accuracy': 0.65, 'bts_roc_auc': 0.65, 'bts_jaccard': 0.55} ####################################################################### # RF: hyperparam [~45 min] Best model: {'clf__estimator': RandomForestClassifier(class_weight='balanced', max_depth=4, max_features=None, min_samples_leaf=2, min_samples_split=15, n_estimators=10, n_jobs=10, oob_score=True, random_state=42), 'clf__estimator__class_weight': 'balanced', 'clf__estimator__criterion': 'gini', 'clf__estimator__max_depth': 4, 'clf__estimator__max_features': None, 'clf__estimator__min_samples_leaf': 2, 'clf__estimator__min_samples_split': 15, 'clf__estimator__n_estimators': 10} Best models score: 0.3329374281771619 : 0.33 RF bts_fscore 0.69 bts_mcc 0.37 bts_precision 0.67 bts_recall 0.72 bts_accuracy 0.68 bts_roc_auc 0.68 bts_jaccard 0.53 ####################################################################### # ABC: hyperparam {'clf__estimator': AdaBoostClassifier(n_estimators=2, random_state=42), 'clf__estimator__n_estimators': 2} ABC 1 [(clf__estimator, AdaBoostClassifier(n_estimat... bts_fscore 0.71 bts_mcc 0.36 bts_precision 0.63 bts_recall 0.83 bts_accuracy 0.67 bts_roc_auc 0.67 bts_jaccard 0.56 ####################################################################### # BC: hyperparam {'clf__estimator': BaggingClassifier(n_estimators=200, n_jobs=10, oob_score=True, random_state=42), 'clf__estimator__n_estimators': 200} BC 0 best_model_params 1 [(clf__estimator, BaggingClassifier(n_estimato... bts_fscore 0.72 bts_mcc 0.37 bts_precision 0.64 bts_recall 0.82 bts_accuracy 0.68 bts_roc_auc 0.68 bts_jaccard 0.56 ####################################################################### # BNB: hyperparam {'clf__estimator': BernoulliNB(alpha=1, binarize=None), 'clf__estimator__alpha': 1, 'clf__estimator__binarize': None, 'clf__estimator__class_prior': None, 'clf__estimator__fit_prior': True} BNB 1 [(clf__estimator, BernoulliNB(alpha=1, binariz... bts_fscore 0.72 bts_mcc 0.35 bts_precision 0.6 bts_recall 0.92 bts_accuracy 0.65 bts_roc_auc 0.65 bts_jaccard 0.56 ####################################################################### # DT: hyperparam {'clf__estimator': DecisionTreeClassifier(class_weight='balanced', criterion='entropy', max_depth=2, random_state=42), 'clf__estimator__class_weight': 'balanced', 'clf__estimator__criterion': 'entropy', 'clf__estimator__max_depth': 2, 'clf__estimator__max_features': None, 'clf__estimator__min_samples_leaf': 1, 'clf__estimator__min_samples_split': 2} DT 1 [(clf__estimator, DecisionTreeClassifier(class... bts_fscore 0.72 bts_mcc 0.42 bts_precision 0.69 bts_recall 0.76 bts_accuracy 0.71 bts_roc_auc 0.71 bts_jaccard 0.57 ####################################################################### # GBC: hyperparam {'clf__estimator': GradientBoostingClassifier(learning_rate=0.01, max_depth=7, random_state=42, subsample=0.5), 'clf__estimator__learning_rate': 0.01, 'clf__estimator__max_depth': 7, 'clf__estimator__n_estimators': 100, 'clf__estimator__subsample': 0.5} GBC 1 [(clf__estimator, GradientBoostingClassifier(l... bts_fscore 0.71 bts_mcc 0.33 bts_precision 0.6 bts_recall 0.88 bts_accuracy 0.64 bts_roc_auc 0.65 bts_jaccard 0.55 ####################################################################### # GNB: hyperparam {'clf__estimator': GaussianNB(var_smoothing=0.006579332246575682), 'clf__estimator__priors': None, 'clf__estimator__var_smoothing': 0.006579332246575682} GNB 1 [(clf__estimator, GaussianNB(var_smoothing=0.0... bts_fscore 0.72 bts_mcc 0.46 bts_precision 0.73 bts_recall 0.71 bts_accuracy 0.73 bts_roc_auc 0.73 bts_jaccard 0.57 ####################################################################### # GPC: hyperparam {'clf__estimator': GaussianProcessClassifier(kernel=1**2 * Matern(length_scale=1, nu=1.5), random_state=42), 'clf__estimator__kernel': 1**2 * Matern(length_scale=1, nu=1.5)} ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__alpha is close to the specified upper bound 100000.0. Increasing the bound and calling fit again may find a better value. warnings.warn( GPC 1 [(clf__estimator, GaussianProcessClassifier(ke... bts_fscore 0.73 bts_mcc 0.38 bts_precision 0.6 bts_recall 0.92 bts_accuracy 0.66 bts_roc_auc 0.66 bts_jaccard 0.58 ####################################################################### # KNN: hyperparam Best model: {'clf__estimator': KNeighborsClassifier(metric='euclidean', n_jobs=10, n_neighbors=11, weights='distance'), 'clf__estimator__metric': 'euclidean', 'clf__estimator__n_neighbors': 11, 'clf__estimator__weights': 'distance'} 1 [(clf__estimator, KNeighborsClassifier(metric=... bts_fscore 0.69 bts_mcc 0.26 bts_precision 0.58 bts_recall 0.85 bts_accuracy 0.62 bts_roc_auc 0.62 bts_jaccard 0.52 Best model: {'clf__estimator': KNeighborsClassifier(metric='euclidean', n_jobs=10, n_neighbors=29), 'clf__estimator__metric': 'euclidean', 'clf__estimator__n_neighbors': 29, 'clf__estimator__weights': 'uniform'} KNN 1 [(clf__estimator, KNeighborsClassifier(metric=... bts_fscore 0.73 bts_mcc 0.37 bts_precision 0.6 bts_recall 0.92 bts_accuracy 0.65 bts_roc_auc 0.65 bts_jaccard 0.57 ####################################################################### # MLP: hyperparam #constant lr, tried others as well, but comes back with constant {'clf__estimator': MLPClassifier(hidden_layer_sizes=3, max_iter=500, random_state=42, solver='lbfgs'), 'clf__estimator__hidden_layer_sizes': 3, 'clf__estimator__learning_rate': 'constant', 'clf__estimator__solver': 'lbfgs'} 1 [(clf__estimator, MLPClassifier(hidden_layer_s... bts_fscore 0.71 bts_mcc 0.34 bts_precision 0.61 bts_recall 0.86 bts_accuracy 0.65 bts_roc_auc 0.65 bts_jaccard 0.55 ####################################################################### # QDA: hyperparam Best model: {'clf__estimator': QuadraticDiscriminantAnalysis()} QDA 1 [(clf__estimator, QuadraticDiscriminantAnalysi... bts_fscore 0.66 bts_mcc 0.33 bts_precision 0.67 bts_recall 0.65 bts_accuracy 0.67 bts_roc_auc 0.67 bts_jaccard 0.49 ####################################################################### # RC: hyperparam Best model: {'clf__estimator': RidgeClassifier(alpha=0.8, random_state=42) , 'clf__estimator__alpha': 0.8} Ridge Classifier 1 [(clf__estimator, RidgeClassifier(alpha=0.8, r... bts_fscore 0.71 bts_mcc 0.31 bts_precision 0.59 bts_recall 0.88 bts_accuracy 0.64 bts_roc_auc 0.64 bts_jaccard 0.55 ####################################################################### # SVC: hyperparam Best model: {'clf__estimator': SVC(C=10, kernel='linear', random_state=42), 'clf__estimator__C': 10, 'clf__estimator__gamma': 'scale', 'clf__estimator__kernel': 'linear'} SVC 1 [(clf__estimator, SVC(C=10, kernel='linear', r... bts_fscore 0.71 bts_mcc 0.31 bts_precision 0.57 bts_recall 0.93 bts_accuracy 0.62 bts_roc_auc 0.62 bts_jaccard 0.55 Best model: {'clf__estimator': SVC(C=10, gamma='auto', random_state=42), 'clf__estimator__C': 10, 'clf__estimator__gamma': 'auto', 'clf__estimator__kernel': 'rbf'} Best models score: SVC 1 [(clf__estimator, SVC(C=10, gamma='auto', rand... bts_fscore 0.71 bts_mcc 0.32 bts_precision 0.58 bts_recall 0.93 bts_accuracy 0.63 bts_roc_auc 0.63 bts_jaccard 0.56 Best model: {'clf__estimator': SVC(C=50, gamma='auto', kernel='sigmoid', random_state=42), 'clf__estimator__C': 50, 'clf__estimator__gamma': 'auto', 'clf__estimator__kernel': 'sigmoid'} SVC 1 [(clf__estimator, SVC(C=50, gamma='auto', kern... bts_fscore 0.72 bts_mcc 0.33 bts_precision 0.58 bts_recall 0.93 bts_accuracy 0.63 bts_roc_auc 0.63 bts_jaccard 0.56 ####################################################################### # XGB: hyperparam Best model: {'clf__estimator': XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, colsample_bynode=None, colsample_bytree=None, enable_categorical=False, gamma=None, gpu_id=None, importance_type=None, interaction_constraints=None, learning_rate=0.01, max_delta_step=None, max_depth=6, max_features='auto', min_child_weight=None, min_samples_leaf=4, missing=nan, monotone_constraints=None, n_estimators=100, n_jobs=10, num_parallel_tree=None, predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, scale_pos_weight=None, subsample=None, tree_method=None, validate_parameters=None, verbosity=None), 'clf__estimator__learning_rate': 0.01, 'clf__estimator__max_depth': 6, 'clf__estimator__max_features': 'auto', 'clf__estimator__min_samples_leaf': 4} XGBoost 0 best_model_params 1 [(clf__estimator, XGBClassifier(base_score=Non... bts_fscore 0.68 bts_mcc 0.31 bts_precision 0.63 bts_recall 0.73 bts_accuracy 0.65 bts_roc_auc 0.65 bts_jaccard 0.51