#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar  7 15:20:42 2022

@author: tanu
"""
fit_time      0.008588
score_time    0.004460
test_acc      0.690148
test_prec     0.690868
test_rec      0.771250
test_f1       0.725441

# RF
fit_time      0.368793
score_time    0.110153
test_acc      0.672537
test_prec     0.664875
test_rec      0.790417
test_f1       0.720224
dtype: float64
#%%
numerical_features: ['ligand_distance', 'ligand_affinity_change'
 , 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2'
 , 'asa', 'rsa', 'kd_values', 'rd_values'
 , 'consurf_score', 'snap2_score', 'snap2_accuracy_pc']

                  Model  F1_Score  Precision    Recall  Accuracy   ROC_AUC
 0  Logistic Regression  0.734177   0.690476  0.783784  0.700000  0.694922
 1          Naive Bayes  0.467290   0.757576  0.337838  0.592857  0.608313
 2  K-Nearest Neighbors  0.773006   0.707865  0.851351  0.735714  0.728706
 3                  SVM  0.766467   0.688172  0.864865  0.721429  0.712735
 4                  MLP  0.725000   0.674419  0.783784  0.685714  0.679771
 5        Decision Tree  0.662069   0.676056  0.648649  0.650000  0.650082
 6          Extra Trees  0.748387   0.716049  0.783784  0.721429  0.717649
 7        Random Forest  0.722581   0.691358  0.756757  0.692857  0.688984
 8       Random Forest2  0.731707   0.666667  0.810811  0.685714  0.678133
 9              XGBoost  0.692810   0.670886  0.716216  0.664286  0.661138)
    
all_features: numerical_features + ['ss_class', 'wt_prop_water', 'mut_prop_water', 'wt_prop_polarity',
       'mut_prop_polarity', 'wt_calcprop', 'mut_calcprop', 'active_aa_pos']

                  Model  F1_Score  Precision    Recall  Accuracy   ROC_AUC
 0  Logistic Regression  0.757764   0.701149  0.824324  0.721429  0.715192
 1          Naive Bayes  0.620690   0.633803  0.608108  0.607143  0.607084
 2  K-Nearest Neighbors  0.619355   0.592593  0.648649  0.578571  0.574324
 3                  SVM  0.766467   0.688172  0.864865  0.721429  0.712735
 4                  MLP  0.738854   0.698795  0.783784  0.707143  0.702498
 5        Decision Tree  0.666667   0.701493  0.635135  0.664286  0.666052
 6          Extra Trees  0.728395   0.670455  0.797297  0.685714  0.678952
 7        Random Forest  0.763636   0.692308  0.851351  0.721429  0.713554
 8       Random Forest2  0.746988   0.673913  0.837838  0.700000  0.691646
 9              XGBoost  0.710526   0.692308  0.729730  0.685714  0.683047)
#%%
                  Model     F1_Score   Precision Recall    Accuracy  ROC_AUC
 0Num  Logistic Regression  0.734177   0.690476  0.783784  0.700000  0.694922
 0All  Logistic Regression  0.757764   0.701149  0.824324  0.721429  0.715192
 
 1Num          Naive Bayes  0.467290   0.757576  0.337838  0.592857  0.608313
 1All          Naive Bayes  0.620690   0.633803  0.608108  0.607143  0.607084

 2Num  K-Nearest Neighbors  0.773006   0.707865  0.851351  0.735714  0.728706 ** 'Num' is better than 'All'
 2All  K-Nearest Neighbors  0.619355   0.592593  0.648649  0.578571  0.574324 

 3Num                 SVM  0.766467   0.688172  0.864865  0.721429  0.712735
 3All                 SVM  0.766467   0.688172  0.864865  0.721429  0.712735

 4Num                  MLP  0.725000   0.674419  0.783784  0.685714  0.679771
 4All                  MLP  0.738854   0.698795  0.783784  0.707143  0.702498

 5Num        Decision Tree  0.662069   0.676056  0.648649  0.650000  0.650082 ** marginal, equivalent
 5All        Decision Tree  0.666667   0.701493  0.635135  0.664286  0.666052

 6Num          Extra Trees  0.748387   0.716049  0.783784  0.721429  0.717649 ** marginal, equivalent
 6All          Extra Trees  0.728395   0.670455  0.797297  0.685714  0.678952

 7Num        Random Forest  0.722581   0.691358  0.756757  0.692857  0.688984
 7All        Random Forest  0.763636   0.692308  0.851351  0.721429  0.713554

 8Num       Random Forest2  0.731707   0.666667  0.810811  0.685714  0.678133
 8All       Random Forest2  0.746988   0.673913  0.837838  0.700000  0.691646
 
 9Num              XGBoost  0.692810   0.670886  0.716216  0.664286  0.661138)
 9All              XGBoost  0.710526   0.692308  0.729730  0.685714  0.683047)
   

#%%
                  Model  F1_Score  Precision    Recall  Accuracy   ROC_AUC
 0  Logistic Regression  0.757764   0.701149  0.824324  0.721429  0.715192
 1          Naive Bayes  0.628571   0.666667  0.594595  0.628571  0.630631
 2  K-Nearest Neighbors  0.666667   0.623529  0.716216  0.621429  0.615684
 3                  SVM  0.766467   0.688172  0.864865  0.721429  0.712735
 4                  MLP  0.726115   0.686747  0.770270  0.692857  0.688165
 5        Decision Tree  0.647482   0.692308  0.608108  0.650000  0.652539
 6          Extra Trees  0.760736   0.696629  0.837838  0.721429  0.714373
 7        Random Forest  0.736196   0.674157  0.810811  0.692857  0.685708
 8       Random Forest2  0.736196   0.674157  0.810811  0.692857  0.685708
 9              XGBoost  0.710526   0.692308  0.729730  0.685714  0.683047)