#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Mar 7 15:20:42 2022 @author: tanu """ fit_time 0.008588 score_time 0.004460 test_acc 0.690148 test_prec 0.690868 test_rec 0.771250 test_f1 0.725441 # RF fit_time 0.368793 score_time 0.110153 test_acc 0.672537 test_prec 0.664875 test_rec 0.790417 test_f1 0.720224 dtype: float64 #%% numerical_features: ['ligand_distance', 'ligand_affinity_change' , 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2' , 'asa', 'rsa', 'kd_values', 'rd_values' , 'consurf_score', 'snap2_score', 'snap2_accuracy_pc'] Model F1_Score Precision Recall Accuracy ROC_AUC 0 Logistic Regression 0.734177 0.690476 0.783784 0.700000 0.694922 1 Naive Bayes 0.467290 0.757576 0.337838 0.592857 0.608313 2 K-Nearest Neighbors 0.773006 0.707865 0.851351 0.735714 0.728706 3 SVM 0.766467 0.688172 0.864865 0.721429 0.712735 4 MLP 0.725000 0.674419 0.783784 0.685714 0.679771 5 Decision Tree 0.662069 0.676056 0.648649 0.650000 0.650082 6 Extra Trees 0.748387 0.716049 0.783784 0.721429 0.717649 7 Random Forest 0.722581 0.691358 0.756757 0.692857 0.688984 8 Random Forest2 0.731707 0.666667 0.810811 0.685714 0.678133 9 XGBoost 0.692810 0.670886 0.716216 0.664286 0.661138) all_features: numerical_features + ['ss_class', 'wt_prop_water', 'mut_prop_water', 'wt_prop_polarity', 'mut_prop_polarity', 'wt_calcprop', 'mut_calcprop', 'active_aa_pos'] Model F1_Score Precision Recall Accuracy ROC_AUC 0 Logistic Regression 0.757764 0.701149 0.824324 0.721429 0.715192 1 Naive Bayes 0.620690 0.633803 0.608108 0.607143 0.607084 2 K-Nearest Neighbors 0.619355 0.592593 0.648649 0.578571 0.574324 3 SVM 0.766467 0.688172 0.864865 0.721429 0.712735 4 MLP 0.738854 0.698795 0.783784 0.707143 0.702498 5 Decision Tree 0.666667 0.701493 0.635135 0.664286 0.666052 6 Extra Trees 0.728395 0.670455 0.797297 0.685714 0.678952 7 Random Forest 0.763636 0.692308 0.851351 0.721429 0.713554 8 Random Forest2 0.746988 0.673913 0.837838 0.700000 0.691646 9 XGBoost 0.710526 0.692308 0.729730 0.685714 0.683047) #%% Model F1_Score Precision Recall Accuracy ROC_AUC 0Num Logistic Regression 0.734177 0.690476 0.783784 0.700000 0.694922 0All Logistic Regression 0.757764 0.701149 0.824324 0.721429 0.715192 1Num Naive Bayes 0.467290 0.757576 0.337838 0.592857 0.608313 1All Naive Bayes 0.620690 0.633803 0.608108 0.607143 0.607084 2Num K-Nearest Neighbors 0.773006 0.707865 0.851351 0.735714 0.728706 ** 'Num' is better than 'All' 2All K-Nearest Neighbors 0.619355 0.592593 0.648649 0.578571 0.574324 3Num SVM 0.766467 0.688172 0.864865 0.721429 0.712735 3All SVM 0.766467 0.688172 0.864865 0.721429 0.712735 4Num MLP 0.725000 0.674419 0.783784 0.685714 0.679771 4All MLP 0.738854 0.698795 0.783784 0.707143 0.702498 5Num Decision Tree 0.662069 0.676056 0.648649 0.650000 0.650082 ** marginal, equivalent 5All Decision Tree 0.666667 0.701493 0.635135 0.664286 0.666052 6Num Extra Trees 0.748387 0.716049 0.783784 0.721429 0.717649 ** marginal, equivalent 6All Extra Trees 0.728395 0.670455 0.797297 0.685714 0.678952 7Num Random Forest 0.722581 0.691358 0.756757 0.692857 0.688984 7All Random Forest 0.763636 0.692308 0.851351 0.721429 0.713554 8Num Random Forest2 0.731707 0.666667 0.810811 0.685714 0.678133 8All Random Forest2 0.746988 0.673913 0.837838 0.700000 0.691646 9Num XGBoost 0.692810 0.670886 0.716216 0.664286 0.661138) 9All XGBoost 0.710526 0.692308 0.729730 0.685714 0.683047) #%% Model F1_Score Precision Recall Accuracy ROC_AUC 0 Logistic Regression 0.757764 0.701149 0.824324 0.721429 0.715192 1 Naive Bayes 0.628571 0.666667 0.594595 0.628571 0.630631 2 K-Nearest Neighbors 0.666667 0.623529 0.716216 0.621429 0.615684 3 SVM 0.766467 0.688172 0.864865 0.721429 0.712735 4 MLP 0.726115 0.686747 0.770270 0.692857 0.688165 5 Decision Tree 0.647482 0.692308 0.608108 0.650000 0.652539 6 Extra Trees 0.760736 0.696629 0.837838 0.721429 0.714373 7 Random Forest 0.736196 0.674157 0.810811 0.692857 0.685708 8 Random Forest2 0.736196 0.674157 0.810811 0.692857 0.685708 9 XGBoost 0.710526 0.692308 0.729730 0.685714 0.683047)