From bb8f6f70ba64f9583d45769d2560cc386034230b Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 7 Mar 2022 18:27:58 +0000 Subject: [PATCH] added prelim run for pnca all models with on-hot encoder multi model pipeline --- pnca_results_v1.py | 87 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 pnca_results_v1.py diff --git a/pnca_results_v1.py b/pnca_results_v1.py new file mode 100644 index 0000000..7d8b097 --- /dev/null +++ b/pnca_results_v1.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Mar 7 15:20:42 2022 + +@author: tanu +""" +fit_time 0.008588 +score_time 0.004460 +test_acc 0.690148 +test_prec 0.690868 +test_rec 0.771250 +test_f1 0.725441 + +# RF +fit_time 0.368793 +score_time 0.110153 +test_acc 0.672537 +test_prec 0.664875 +test_rec 0.790417 +test_f1 0.720224 +dtype: float64 +#%% +numerical_features: ['ligand_distance', 'ligand_affinity_change' + , 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2' + , 'asa', 'rsa', 'kd_values', 'rd_values' + , 'consurf_score', 'snap2_score', 'snap2_accuracy_pc'] + + Model F1_Score Precision Recall Accuracy ROC_AUC + 0 Logistic Regression 0.734177 0.690476 0.783784 0.700000 0.694922 + 1 Naive Bayes 0.467290 0.757576 0.337838 0.592857 0.608313 + 2 K-Nearest Neighbors 0.773006 0.707865 0.851351 0.735714 0.728706 + 3 SVM 0.766467 0.688172 0.864865 0.721429 0.712735 + 4 MLP 0.725000 0.674419 0.783784 0.685714 0.679771 + 5 Decision Tree 0.662069 0.676056 0.648649 0.650000 0.650082 + 6 Extra Trees 0.748387 0.716049 0.783784 0.721429 0.717649 + 7 Random Forest 0.722581 0.691358 0.756757 0.692857 0.688984 + 8 Random Forest2 0.731707 0.666667 0.810811 0.685714 0.678133 + 9 XGBoost 0.692810 0.670886 0.716216 0.664286 0.661138) + +all_features: numerical_features + ['ss_class', 'wt_prop_water', 'mut_prop_water', 'wt_prop_polarity', + 'mut_prop_polarity', 'wt_calcprop', 'mut_calcprop', 'active_aa_pos'] + + Model F1_Score Precision Recall Accuracy ROC_AUC + 0 Logistic Regression 0.757764 0.701149 0.824324 0.721429 0.715192 + 1 Naive Bayes 0.620690 0.633803 0.608108 0.607143 0.607084 + 2 K-Nearest Neighbors 0.619355 0.592593 0.648649 0.578571 0.574324 + 3 SVM 0.766467 0.688172 0.864865 0.721429 0.712735 + 4 MLP 0.738854 0.698795 0.783784 0.707143 0.702498 + 5 Decision Tree 0.666667 0.701493 0.635135 0.664286 0.666052 + 6 Extra Trees 0.728395 0.670455 0.797297 0.685714 0.678952 + 7 Random Forest 0.763636 0.692308 0.851351 0.721429 0.713554 + 8 Random Forest2 0.746988 0.673913 0.837838 0.700000 0.691646 + 9 XGBoost 0.710526 0.692308 0.729730 0.685714 0.683047) +#%% + Model F1_Score Precision Recall Accuracy ROC_AUC + 0Num Logistic Regression 0.734177 0.690476 0.783784 0.700000 0.694922 + 0All Logistic Regression 0.757764 0.701149 0.824324 0.721429 0.715192 + + 1Num Naive Bayes 0.467290 0.757576 0.337838 0.592857 0.608313 + 1All Naive Bayes 0.620690 0.633803 0.608108 0.607143 0.607084 + + 2Num K-Nearest Neighbors 0.773006 0.707865 0.851351 0.735714 0.728706 ** 'Num' is better than 'All' + 2All K-Nearest Neighbors 0.619355 0.592593 0.648649 0.578571 0.574324 + + 3Num SVM 0.766467 0.688172 0.864865 0.721429 0.712735 + 3All SVM 0.766467 0.688172 0.864865 0.721429 0.712735 + + 4Num MLP 0.725000 0.674419 0.783784 0.685714 0.679771 + 4All MLP 0.738854 0.698795 0.783784 0.707143 0.702498 + + 5Num Decision Tree 0.662069 0.676056 0.648649 0.650000 0.650082 ** marginal, equivalent + 5All Decision Tree 0.666667 0.701493 0.635135 0.664286 0.666052 + + 6Num Extra Trees 0.748387 0.716049 0.783784 0.721429 0.717649 ** marginal, equivalent + 6All Extra Trees 0.728395 0.670455 0.797297 0.685714 0.678952 + + 7Num Random Forest 0.722581 0.691358 0.756757 0.692857 0.688984 + 7All Random Forest 0.763636 0.692308 0.851351 0.721429 0.713554 + + 8Num Random Forest2 0.731707 0.666667 0.810811 0.685714 0.678133 + 8All Random Forest2 0.746988 0.673913 0.837838 0.700000 0.691646 + + 9Num XGBoost 0.692810 0.670886 0.716216 0.664286 0.661138) + 9All XGBoost 0.710526 0.692308 0.729730 0.685714 0.683047) + +