diff --git a/scripts/ml/MultModelsCl.py b/scripts/ml/MultModelsCl.py
index 078d60a..dfcd87c 100755
--- a/scripts/ml/MultModelsCl.py
+++ b/scripts/ml/MultModelsCl.py
@@ -147,7 +147,7 @@ def MultModelsCl(input_df, target, skf_cv
     mlp     = MLPClassifier(max_iter = 500, **rs)
     dt      = DecisionTreeClassifier(**rs)
     ets     = ExtraTreesClassifier(**rs)
-    
+    et      = ExtraTreeClassifier(**rs)  
     rf      = RandomForestClassifier(**rs, n_estimators = 1000 )
     rf2     = RandomForestClassifier(
                           min_samples_leaf = 5
@@ -169,7 +169,6 @@ def MultModelsCl(input_df, target, skf_cv
 
     abc = AdaBoostClassifier(**rs)
     bc  = BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True)
-    et  = ExtraTreeClassifier(**rs)
     gpc = GaussianProcessClassifier(**rs)
     gbc = GradientBoostingClassifier(**rs)
     qda = QuadraticDiscriminantAnalysis()
@@ -181,14 +180,13 @@ def MultModelsCl(input_df, target, skf_cv
             , ('Gaussian NB'               , gnb)
             , ('Naive Bayes'               , nb)
             , ('K-Nearest Neighbors'       , knn) 
-            , ('SVM'                       , svc) 
+            , ('SVC'                       , svc) 
             , ('MLP'                       , mlp) 
             , ('Decision Tree'             , dt) 
             , ('Extra Trees'               , ets) 
             , ('Extra Tree'                , et)
             , ('Random Forest'             , rf) 
             , ('Random Forest2'            , rf2) 
-            , ('Naive Bayes'               , nb)
             , ('XGBoost'                   , xgb)
             , ('LDA'                       , lda)
             , ('Multinomial'               , mnb)
diff --git a/scripts/ml/alr_config.py b/scripts/ml/alr_config.py
index 55a82eb..996748f 100755
--- a/scripts/ml/alr_config.py
+++ b/scripts/ml/alr_config.py
@@ -32,15 +32,36 @@ from ml_data import *
 # TT run all ML clfs: baseline mode
 from MultModelsCl import MultModelsCl
 
-#%%###########################################################################
-
-print('\n#####################################################################\n')
-
-print('TESTING cmd:'
+############################################################################
+print('\n#####################################################################\n'
+      , '\nRunning ML analysis: UQ [without AA  index but with active site annotations]'
       , '\nGene name:', gene
-      , '\nDrug name:', drug
-      , '\nTotal input features:', X.shape
-      , '\n', Counter(y))
+      , '\nDrug name:', drug)
+
+#==================
+# Specify outdir 
+#==================
+
+outdir_ml = outdir + 'ml/uq_v1/'
+
+print('\nOutput directory:', outdir_ml)
+
+#%%###########################################################################
+print('\nSanity checks:'
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
+      , '\n\n#####################################################################\n')
+
+print('\n================================================================\n')
 
 print('Strucutral features (n):'
       , len(X_ssFN)
@@ -50,11 +71,11 @@ print('Strucutral features (n):'
       , '\nOther struc columns:', X_str
       , '\n================================================================\n')
 
-print('AAindex features (n):'
-      , len(X_aaindexFN)
-      , '\nThese are:\n'
-      , X_aaindexFN
-      , '\n================================================================\n')
+# print('AAindex features (n):'
+#       , len(X_aaindexFN)
+#       , '\nThese are:\n'
+#       , X_aaindexFN
+#       , '\n================================================================\n')
 
 print('Evolutionary features (n):'
       , len(X_evolFN)
@@ -75,20 +96,15 @@ print('Categorical features (n):'
       , categorical_FN
       , '\n================================================================\n')
 
-if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+#if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+if ( len(X.columns) ==  len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
     print('\nPass: No. of features match')
 else:
     sys.exit('\nFail: Count of feature mismatch')
 
 print('\n#####################################################################\n')
-################################################################################
-#==================
-# Specify outdir 
-#==================
 
-outdir_ml = outdir + 'ml/v2/'
-
-################################################################################
+###############################################################################
 #==================
 # Baseline models 
 #==================
diff --git a/scripts/ml/embb_config.py b/scripts/ml/embb_config.py
index be4d609..381f95f 100755
--- a/scripts/ml/embb_config.py
+++ b/scripts/ml/embb_config.py
@@ -32,15 +32,36 @@ from ml_data import *
 # TT run all ML clfs: baseline mode
 from MultModelsCl import MultModelsCl
 
-#%%###########################################################################
-
-print('\n#####################################################################\n')
-
-print('TESTING cmd:'
+############################################################################
+print('\n#####################################################################\n'
+      , '\nRunning ML analysis: UQ [without AA  index but with active site annotations]'
       , '\nGene name:', gene
-      , '\nDrug name:', drug
-      , '\nTotal input features:', X.shape
-      , '\n', Counter(y))
+      , '\nDrug name:', drug)
+
+#==================
+# Specify outdir 
+#==================
+
+outdir_ml = outdir + 'ml/uq_v1/'
+
+print('\nOutput directory:', outdir_ml)
+
+#%%###########################################################################
+print('\nSanity checks:'
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
+      , '\n\n#####################################################################\n')
+
+print('\n================================================================\n')
 
 print('Strucutral features (n):'
       , len(X_ssFN)
@@ -50,11 +71,11 @@ print('Strucutral features (n):'
       , '\nOther struc columns:', X_str
       , '\n================================================================\n')
 
-print('AAindex features (n):'
-      , len(X_aaindexFN)
-      , '\nThese are:\n'
-      , X_aaindexFN
-      , '\n================================================================\n')
+# print('AAindex features (n):'
+#       , len(X_aaindexFN)
+#       , '\nThese are:\n'
+#       , X_aaindexFN
+#       , '\n================================================================\n')
 
 print('Evolutionary features (n):'
       , len(X_evolFN)
@@ -75,20 +96,15 @@ print('Categorical features (n):'
       , categorical_FN
       , '\n================================================================\n')
 
-if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+#if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+if ( len(X.columns) ==  len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
     print('\nPass: No. of features match')
 else:
     sys.exit('\nFail: Count of feature mismatch')
 
 print('\n#####################################################################\n')
-################################################################################
-#==================
-# Specify outdir 
-#==================
 
-outdir_ml = outdir + 'ml/v2/'
-
-################################################################################
+###############################################################################
 #==================
 # Baseline models 
 #==================
diff --git a/scripts/ml/gid_config.py b/scripts/ml/gid_config.py
index 73594af..8541086 100755
--- a/scripts/ml/gid_config.py
+++ b/scripts/ml/gid_config.py
@@ -32,15 +32,36 @@ from ml_data import *
 # TT run all ML clfs: baseline mode
 from MultModelsCl import MultModelsCl
 
-#%%###########################################################################
-
-print('\n#####################################################################\n')
-
-print('TESTING cmd:'
+############################################################################
+print('\n#####################################################################\n'
+      , '\nRunning ML analysis: UQ [without AA  index but with active site annotations]'
       , '\nGene name:', gene
-      , '\nDrug name:', drug
-      , '\nTotal input features:', X.shape
-      , '\n', Counter(y))
+      , '\nDrug name:', drug)
+
+#==================
+# Specify outdir 
+#==================
+
+outdir_ml = outdir + 'ml/uq_v1/'
+
+print('\nOutput directory:', outdir_ml)
+
+#%%###########################################################################
+print('\nSanity checks:'
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
+      , '\n\n#####################################################################\n')
+
+print('\n================================================================\n')
 
 print('Strucutral features (n):'
       , len(X_ssFN)
@@ -50,11 +71,11 @@ print('Strucutral features (n):'
       , '\nOther struc columns:', X_str
       , '\n================================================================\n')
 
-print('AAindex features (n):'
-      , len(X_aaindexFN)
-      , '\nThese are:\n'
-      , X_aaindexFN
-      , '\n================================================================\n')
+# print('AAindex features (n):'
+#       , len(X_aaindexFN)
+#       , '\nThese are:\n'
+#       , X_aaindexFN
+#       , '\n================================================================\n')
 
 print('Evolutionary features (n):'
       , len(X_evolFN)
@@ -75,20 +96,15 @@ print('Categorical features (n):'
       , categorical_FN
       , '\n================================================================\n')
 
-if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+#if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+if ( len(X.columns) ==  len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
     print('\nPass: No. of features match')
 else:
     sys.exit('\nFail: Count of feature mismatch')
 
 print('\n#####################################################################\n')
-################################################################################
-#==================
-# Specify outdir 
-#==================
 
-outdir_ml = outdir + 'ml/v2/'
-
-################################################################################
+###############################################################################
 #==================
 # Baseline models 
 #==================
diff --git a/scripts/ml/katg_config.py b/scripts/ml/katg_config.py
index 59cf24a..1258f39 100755
--- a/scripts/ml/katg_config.py
+++ b/scripts/ml/katg_config.py
@@ -32,15 +32,36 @@ from ml_data import *
 # TT run all ML clfs: baseline mode
 from MultModelsCl import MultModelsCl
 
-#%%###########################################################################
-
-print('\n#####################################################################\n')
-
-print('TESTING cmd:'
+############################################################################
+print('\n#####################################################################\n'
+      , '\nRunning ML analysis: UQ [without AA  index but with active site annotations]'
       , '\nGene name:', gene
-      , '\nDrug name:', drug
-      , '\nTotal input features:', X.shape
-      , '\n', Counter(y))
+      , '\nDrug name:', drug)
+
+#==================
+# Specify outdir 
+#==================
+
+outdir_ml = outdir + 'ml/uq_v1/'
+
+print('\nOutput directory:', outdir_ml)
+
+#%%###########################################################################
+print('\nSanity checks:'
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
+      , '\n\n#####################################################################\n')
+
+print('\n================================================================\n')
 
 print('Strucutral features (n):'
       , len(X_ssFN)
@@ -50,11 +71,11 @@ print('Strucutral features (n):'
       , '\nOther struc columns:', X_str
       , '\n================================================================\n')
 
-print('AAindex features (n):'
-      , len(X_aaindexFN)
-      , '\nThese are:\n'
-      , X_aaindexFN
-      , '\n================================================================\n')
+# print('AAindex features (n):'
+#       , len(X_aaindexFN)
+#       , '\nThese are:\n'
+#       , X_aaindexFN
+#       , '\n================================================================\n')
 
 print('Evolutionary features (n):'
       , len(X_evolFN)
@@ -75,20 +96,15 @@ print('Categorical features (n):'
       , categorical_FN
       , '\n================================================================\n')
 
-if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+#if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+if ( len(X.columns) ==  len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
     print('\nPass: No. of features match')
 else:
     sys.exit('\nFail: Count of feature mismatch')
 
 print('\n#####################################################################\n')
-################################################################################
-#==================
-# Specify outdir 
-#==================
 
-outdir_ml = outdir + 'ml/v2/'
-
-################################################################################
+###############################################################################
 #==================
 # Baseline models 
 #==================
diff --git a/scripts/ml/log_gid_7030.txt b/scripts/ml/log_gid_7030.txt
index d0eca99..d1a73f6 100644
--- a/scripts/ml/log_gid_7030.txt
+++ b/scripts/ml/log_gid_7030.txt
@@ -1,58 +1,10 @@
-/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_7030.py:549: SettingWithCopyWarning: 
+/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_7030.py:548: SettingWithCopyWarning: 
 A value is trying to be set on a copy of a slice from a DataFrame
 
 See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
   mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
 /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
   from pandas import MultiIndex, Int64Index
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
 1.22.4
 1.4.1
 
@@ -114,6 +66,7 @@ No. of columns for x_features: 174
 
 ------------------------------------------------------------- 
 Successfully split data with stratification: 70/30 
+Input features data size: (119, 174) 
 Train data size: (79, 174) 
 Test data size: (40, 174) 
 y_train numbers: Counter({0: 50, 1: 29}) 
@@ -123,6 +76,18 @@ y_test_numbers: Counter({0: 26, 1: 14})
 y_test ratio: 1.8571428571428572 
 -------------------------------------------------------------
 
+index: 0 
+ind: 1
+
+Mask count check: True
+
+index: 1 
+ind: 2
+
+Mask count check: True
+Original Data
+ Counter({0: 50, 1: 29}) Data dim: (79, 174)
+
 Simple Random OverSampling
  Counter({1: 50, 0: 50})
 (100, 174)
@@ -146,11 +111,18 @@ Gene name: gid
 Drug name: streptomycin
 
 Output directory: /home/tanu/git/Data/streptomycin/output/ml/tts_7030/
+
 Sanity checks: 
-ML source data size: (119, 174) 
-Total input features: (79, 174) 
-Target feature numbers: Counter({0: 50, 1: 29}) 
-Target features ratio: 1.7241379310344827 
+Total input features: 174 
+ 
+Training data size: (79, 174) 
+Test data size: (40, 174) 
+ 
+Target feature numbers (training data): Counter({0: 50, 1: 29}) 
+Target features ratio (training data: 1.7241379310344827 
+ 
+Target feature numbers (test data): Counter({0: 26, 1: 14}) 
+Target features ratio (test data): 1.8571428571428572 
 
 #####################################################################
 
@@ -165,6 +137,8 @@ Other struc columns: ['rsa', 'kd_values', 'rd_values']
 ================================================================
 
 AAindex features (n): 123 
+These are:
+ ['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'] 
 ================================================================
 
 Evolutionary features (n): 3 
@@ -191,160 +165,7 @@ Pass: No. of features match
 
 Model_name: Logistic Regression 
 Model func: LogisticRegression(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', LogisticRegression(random_state=42))])
-
-key: fit_time 
-value: [0.02347183 0.02686572 0.0307076  0.02450418 0.02523112 0.02600336
- 0.02457237 0.02707028 0.02396798 0.02385616]
-
-mean value: 0.025625061988830567
-
-key: score_time 
-value: [0.01185417 0.00754213 0.01175284 0.01153183 0.01146317 0.01155972
- 0.01150918 0.01154113 0.01146698 0.01157117]
-
-mean value: 0.011179232597351074
-
-key: test_mcc 
-value: [ 0.48795004         nan  0.46666667  0.46666667  0.74535599  0.6
-  0.77459667 -0.29277002  0.74535599  0.09128709]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.91067388 0.88152145 0.90865445 0.90865445 0.90865445 0.87863248
- 0.84744528 0.87830162 0.87830162 0.91085367]
-
-mean value: 0.8911693370709752
-
-key: test_accuracy 
-value: [0.75              nan 0.75       0.75       0.875      0.75
- 0.875      0.5        0.875      0.57142857]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.95774648 0.94366197 0.95774648 0.95774648 0.95774648 0.94366197
- 0.92957746 0.94366197 0.94366197 0.95833333]
-
-mean value: 0.9493544600938967
-
-key: test_fscore 
-value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
     return cache[method]
@@ -392,407 +213,72 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['Other'] in column 5 during transform
 
   warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', LogisticRegression(random_state=42))])
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+key: fit_time 
+value: [0.0366919  0.03364944 0.04210711 0.02675176 0.02643323 0.02680922
+ 0.02558064 0.02712679 0.02658963 0.02430034]
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+mean value: 0.029604005813598632
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+key: score_time 
+value: [0.01219821 0.00790191 0.01219344 0.01198339 0.01178885 0.01182723
+ 0.01177597 0.01183701 0.01194477 0.01177979]
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+mean value: 0.011523056030273437
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+key: test_mcc 
+value: [ 0.48795004         nan  0.46666667  0.46666667  0.74535599  0.6
+  0.77459667 -0.29277002  0.74535599  0.09128709]
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+mean value: nan
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+key: train_mcc 
+value: [0.91067388 0.88152145 0.90865445 0.90865445 0.90865445 0.87863248
+ 0.84744528 0.87830162 0.87830162 0.91085367]
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+mean value: 0.8911693370709752
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+key: test_accuracy 
+value: [0.75              nan 0.75       0.75       0.875      0.75
+ 0.875      0.5        0.875      0.57142857]
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+mean value: nan
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+key: train_accuracy 
+value: [0.95774648 0.94366197 0.95774648 0.95774648 0.95774648 0.94366197
+ 0.92957746 0.94366197 0.94366197 0.95833333]
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+mean value: 0.9493544600938967
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-[0.5               nan 0.66666667 0.66666667 0.8        0.75
+key: test_fscore 
+value: [0.5               nan 0.66666667 0.66666667 0.8        0.75
  0.85714286 0.         0.8        0.4       ]
 
 mean value: nan
@@ -869,7 +355,591 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), (
               predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
               scale_pos_weight=None, subsample=None, tree_method=None,
               use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
                                    transformers=[('num', MinMaxScaler(),
                                                   Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
@@ -886,16 +956,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', LogisticRegressionCV(random_state=42))])
 
 key: fit_time 
-value: [0.53741789 0.69267988 0.48575902 0.56827617 0.53399444 0.59227157
- 0.52956486 0.53589177 0.53715754 0.65834451]
+value: [0.61568356 0.59473109 0.68769026 0.86828899 0.86762929 0.69212842
+ 0.64828324 0.55574918 0.70834208 0.64952207]
 
-mean value: 0.5671357631683349
+mean value: 0.688804817199707
 
 key: score_time 
-value: [0.01202703 0.00642514 0.01250315 0.01604509 0.01320004 0.01179504
- 0.01307392 0.01395082 0.01368642 0.01708794]
+value: [0.01196933 0.00655389 0.01198149 0.01488495 0.01500368 0.012532
+ 0.01223516 0.01520872 0.01664925 0.01557779]
 
-mean value: 0.012979459762573243
+mean value: 0.013259625434875489
 
 key: test_mcc 
 value: [0.1490712         nan 0.46666667 0.46666667 0.46666667 0.6
@@ -986,7 +1056,88 @@ Accuracy on Blind test: 0.72
 
 Model_name: Gaussian NB 
 Model func: GaussianNB() 
-List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', GaussianNB())])
+
+key: fit_time 
+value: [0.01205611 0.01126623 0.00906897 0.00851703 0.008286   0.00828934
+ 0.00834227 0.00843191 0.00838542 0.00838542]
+
+mean value: 0.009102869033813476
+
+key: score_time 
+value: [0.01170921 0.00516486 0.00907874 0.00863242 0.00857592 0.00856733
+ 0.00852275 0.0085392  0.00850701 0.00861979]
+
+mean value: 0.008591723442077637
+
+key: test_mcc 
+value: [-0.06666667         nan  0.06666667 -0.25819889  0.          0.6
+  0.06666667 -0.06666667 -0.46666667  0.54772256]
+
+mean value: nan
+
+key: train_mcc 
+value: [0.63589744 0.3217793  0.4760037  0.61337378 0.48136848 0.59111411
+ 0.59101806 0.61021596 0.61021596 0.61560271]
+
+mean value: 0.554658949505139
+
+key: test_accuracy 
+value: [0.5               nan 0.5        0.375      0.375      0.75
+ 0.5        0.5        0.25       0.71428571]
+
+mean value: nan
+
+key: train_accuracy 
+value: [0.83098592 0.54929577 0.64788732 0.76056338 0.67605634 0.77464789
+ 0.76056338 0.77464789 0.77464789 0.77777778]
+
+mean value: 0.7327073552425665
+
+key: test_fscore 
+value: [0.33333333        nan 0.5        0.28571429 0.54545455 0.75
+ 0.5        0.33333333 0.25       0.66666667]
+
+mean value: nan
+
+key: train_fscore 
+value: [0.76923077 0.6097561  0.67532468 0.75362319 0.68493151 0.75
+ 0.74626866 0.75757576 0.75757576 0.76470588]
+
+mean value: 0.7268992291592407
+
+key: test_precision 
+value: [0.33333333        nan 0.4        0.25       0.375      0.6
+ 0.4        0.33333333 0.2        0.5       ]
+
+mean value: nan
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
     return cache[method]
@@ -1034,87 +1185,8 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['Other'] in column 5 during transform
 
   warnings.warn(
-[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', GaussianNB())])
-
-key: fit_time 
-value: [0.01300025 0.01131201 0.01010251 0.00901103 0.00829911 0.00847673
- 0.00845861 0.00850534 0.00863886 0.00855303]
-
-mean value: 0.009435749053955078
-
-key: score_time 
-value: [0.01224828 0.00450897 0.00966024 0.00870657 0.00878048 0.00860023
- 0.00848126 0.00857925 0.00853801 0.00865126]
-
-mean value: 0.008675456047058105
-
-key: test_mcc 
-value: [-0.06666667         nan  0.06666667 -0.25819889  0.          0.6
-  0.06666667 -0.06666667 -0.46666667  0.54772256]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.63589744 0.3217793  0.4760037  0.61337378 0.48136848 0.59111411
- 0.59101806 0.61021596 0.61021596 0.61560271]
-
-mean value: 0.554658949505139
-
-key: test_accuracy 
-value: [0.5               nan 0.5        0.375      0.375      0.75
- 0.5        0.5        0.25       0.71428571]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.83098592 0.54929577 0.64788732 0.76056338 0.67605634 0.77464789
- 0.76056338 0.77464789 0.77464789 0.77777778]
-
-mean value: 0.7327073552425665
-
-key: test_fscore 
-value: [0.33333333        nan 0.5        0.28571429 0.54545455 0.75
- 0.5        0.33333333 0.25       0.66666667]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.76923077 0.6097561  0.67532468 0.75362319 0.68493151 0.75
- 0.74626866 0.75757576 0.75757576 0.76470588]
-
-mean value: 0.7268992291592407
-
-key: test_precision 
-value: [0.33333333        nan 0.4        0.25       0.375      0.6
- 0.4        0.33333333 0.2        0.5       ]
-
-mean value: nan
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
+  _warn_prf(average, modifier, msg_start, len(result))
 
 key: train_precision 
 value: [0.76923077 0.44642857 0.50980392 0.60465116 0.53191489 0.63157895
@@ -1176,107 +1248,7 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), (
               predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
               scale_pos_weight=None, subsample=None, tree_method=None,
               use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
-  _warn_prf(average, modifier, msg_start, len(result))
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
-  _warn_prf(average, modifier, msg_start, len(result))
-Pipeline(steps=[('prep',
+Running model pipeline: Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
                                    transformers=[('num', MinMaxScaler(),
                                                   Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
@@ -1293,16 +1265,16 @@ Pipeline(steps=[('prep',
                 ('model', BernoulliNB())])
 
 key: fit_time 
-value: [0.00992274 0.00908804 0.01092887 0.00855708 0.00939322 0.00976706
- 0.00851512 0.00860643 0.00846624 0.00871468]
+value: [0.00886774 0.00868058 0.00883889 0.00872326 0.00893021 0.00871801
+ 0.00855303 0.00864196 0.00846744 0.00878453]
 
-mean value: 0.009195947647094726
+mean value: 0.00872056484222412
 
 key: score_time 
-value: [0.01040363 0.00474834 0.00870204 0.00912285 0.00954747 0.0092864
- 0.00849009 0.00853539 0.00894189 0.00856447]
+value: [0.00870299 0.0042479  0.00908852 0.00942707 0.00868988 0.00882626
+ 0.00883055 0.00864148 0.00859904 0.0089457 ]
 
-mean value: 0.008634257316589355
+mean value: 0.00839993953704834
 
 key: test_mcc 
 value: [-0.29277002         nan  0.1490712   0.          0.46666667  0.25819889
@@ -1393,84 +1365,7 @@ Accuracy on Blind test: 0.62
 
 Model_name: K-Nearest Neighbors 
 Model func: KNeighborsClassifier() 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', KNeighborsClassifier())])
-
-key: fit_time 
-value: [0.00953317 0.01132226 0.00892282 0.00931168 0.00937891 0.00864029
- 0.00916529 0.00923681 0.00909519 0.00845647]
-
-mean value: 0.00930628776550293
-
-key: score_time 
-value: [0.04363441 0.00601792 0.01144981 0.01010704 0.00944614 0.00927305
- 0.00956726 0.01000333 0.014678   0.00946236]
-
-mean value: 0.013363933563232422
-
-key: test_mcc 
-value: [ 0.48795004         nan  0.46666667  0.48795004  0.46666667  0.25819889
-  0.48795004  0.         -0.4472136   0.64549722]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.39440661 0.50503962 0.35808137 0.42968701 0.50503962 0.51530373
- 0.4660252  0.49787306 0.56963094 0.52098273]
-
-mean value: 0.47620698672135525
-
-key: test_accuracy 
-value: [0.75              nan 0.75       0.75       0.75       0.625
- 0.75       0.625      0.375      0.85714286]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.73239437 0.77464789 0.71830986 0.74647887 0.77464789 0.77464789
- 0.76056338 0.77464789 0.8028169  0.77777778]
-
-mean value: 0.7636932707355243
-
-key: test_fscore 
-value: [0.5               nan 0.66666667 0.5        0.66666667 0.57142857
- 0.5        0.         0.         0.66666667]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.53658537 0.6        0.5        0.57142857 0.6        0.57894737
- 0.58536585 0.63636364 0.66666667 0.61904762]
-
-mean value: 0.5894405081439741
-
-key: test_precision 
-value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
     return cache[method]
@@ -1520,9 +1415,84 @@ ValueError: Found unknown categories ['Other'] in column 5 during transform
   warnings.warn(
 /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
   _warn_prf(average, modifier, msg_start, len(result))
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
-  _warn_prf(average, modifier, msg_start, len(result))
-[1.                nan 0.66666667 1.         0.66666667 0.5
+[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', KNeighborsClassifier())])
+
+key: fit_time 
+value: [0.00874114 0.01144648 0.0086658  0.00911021 0.00933123 0.00923896
+ 0.00934291 0.00915861 0.00966763 0.00907493]
+
+mean value: 0.009377789497375489
+
+key: score_time 
+value: [0.0487206  0.006109   0.01472044 0.01498747 0.0102694  0.01016641
+ 0.01003814 0.01018739 0.01121283 0.0092566 ]
+
+mean value: 0.014566826820373534
+
+key: test_mcc 
+value: [ 0.48795004         nan  0.46666667  0.48795004  0.46666667  0.25819889
+  0.48795004  0.         -0.4472136   0.64549722]
+
+mean value: nan
+
+key: train_mcc 
+value: [0.39440661 0.50503962 0.35808137 0.42968701 0.50503962 0.51530373
+ 0.4660252  0.49787306 0.56963094 0.52098273]
+
+mean value: 0.47620698672135525
+
+key: test_accuracy 
+value: [0.75              nan 0.75       0.75       0.75       0.625
+ 0.75       0.625      0.375      0.85714286]
+
+mean value: nan
+
+key: train_accuracy 
+value: [0.73239437 0.77464789 0.71830986 0.74647887 0.77464789 0.77464789
+ 0.76056338 0.77464789 0.8028169  0.77777778]
+
+mean value: 0.7636932707355243
+
+key: test_fscore 
+value: [0.5               nan 0.66666667 0.5        0.66666667 0.57142857
+ 0.5        0.         0.         0.66666667]
+
+mean value: nan
+
+key: train_fscore 
+value: [0.53658537 0.6        0.5        0.57142857 0.6        0.57894737
+ 0.58536585 0.63636364 0.66666667 0.61904762]
+
+mean value: 0.5894405081439741
+
+key: test_precision 
+value: [1.                nan 0.66666667 1.         0.66666667 0.5
  1.         0.         0.         1.        ]
 
 mean value: nan
@@ -1587,7 +1557,107 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), (
               predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
               scale_pos_weight=None, subsample=None, tree_method=None,
               use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
+  _warn_prf(average, modifier, msg_start, len(result))
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
+  _warn_prf(average, modifier, msg_start, len(result))
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
                                    transformers=[('num', MinMaxScaler(),
                                                   Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
@@ -1604,16 +1674,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', SVC(random_state=42))])
 
 key: fit_time 
-value: [0.00903344 0.0091846  0.00961351 0.00917578 0.0096128  0.00965309
- 0.00942969 0.0092864  0.00940824 0.00987148]
+value: [0.01034451 0.0102098  0.00897241 0.00901365 0.0088799  0.00927591
+ 0.00998259 0.01031137 0.01042175 0.01003623]
 
-mean value: 0.00942690372467041
+mean value: 0.009744811058044433
 
 key: score_time 
-value: [0.00883532 0.00442553 0.00902295 0.00915003 0.00871062 0.00882101
- 0.00899625 0.00945115 0.00905657 0.00935435]
+value: [0.0098021  0.00447941 0.0090816  0.00874925 0.00895143 0.00880194
+ 0.0096159  0.00961876 0.00949502 0.00887012]
 
-mean value: 0.008582377433776855
+mean value: 0.00874655246734619
 
 key: test_mcc 
 value: [0.48795004        nan 0.48795004 0.48795004 0.48795004 0.74535599
@@ -1702,55 +1772,7 @@ Accuracy on Blind test: 0.68
 
 Model_name: MLP 
 Model func: MLPClassifier(max_iter=500, random_state=42) 
-List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
                        n_estimators=1000, n_jobs=10, oob_score=True,
                        random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
               colsample_bynode=None, colsample_bytree=None,
@@ -1779,16 +1801,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', MLPClassifier(max_iter=500, random_state=42))])
 
 key: fit_time 
-value: [0.36889744 0.38598275 0.39493704 0.38300252 0.44200897 0.40054107
- 0.43577385 0.40177441 0.4005568  0.47488117]
+value: [0.55261087 0.37978768 0.40832138 0.53674722 0.38702536 0.39668012
+ 0.45907712 0.38502288 0.56192374 0.36470985]
 
-mean value: 0.40883560180664064
+mean value: 0.4431906223297119
 
 key: score_time 
-value: [0.01228499 0.00684047 0.01225257 0.0120151  0.0121727  0.0118804
- 0.01362801 0.01195502 0.01198483 0.01258683]
+value: [0.0121274  0.00678778 0.01212931 0.01215839 0.01212978 0.01212168
+ 0.01210904 0.01213956 0.01217985 0.01214075]
 
-mean value: 0.011760091781616211
+mean value: 0.011602354049682618
 
 key: test_mcc 
 value: [ 0.48795004         nan  0.46666667  0.25819889  0.46666667  0.6
@@ -1846,7 +1868,55 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.66666667        nan 0.73333333 0.63333333 0.73333333 0.8
+value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+[0.66666667        nan 0.73333333 0.63333333 0.73333333 0.8
  0.9        0.4        0.73333333 0.45      ]
 
 mean value: nan
@@ -1885,103 +1955,7 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), (
               predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
               scale_pos_weight=None, subsample=None, tree_method=None,
               use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-Pipeline(steps=[('prep',
+Running model pipeline: Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
                                    transformers=[('num', MinMaxScaler(),
                                                   Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
@@ -1998,16 +1972,16 @@ Pipeline(steps=[('prep',
                 ('model', DecisionTreeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.01571584 0.01269054 0.01136661 0.01086378 0.00994062 0.00986242
- 0.01026821 0.01012969 0.00951695 0.01019168]
+value: [0.0133059  0.01309037 0.01105928 0.00985241 0.00974655 0.00949502
+ 0.00951028 0.00971913 0.00915694 0.00968456]
 
-mean value: 0.011054635047912598
+mean value: 0.010462045669555664
 
 key: score_time 
-value: [0.01275587 0.00504398 0.00905156 0.00882149 0.00875378 0.00857162
- 0.00925541 0.00867009 0.00854111 0.00858116]
+value: [0.01178765 0.00480032 0.00885653 0.00857639 0.00867677 0.00849724
+ 0.00837517 0.00841808 0.0084424  0.00845408]
 
-mean value: 0.008804607391357421
+mean value: 0.00848846435546875
 
 key: test_mcc 
 value: [0.25819889        nan 0.77459667 0.77459667 1.         0.6
@@ -2103,7 +2077,103 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), (
               predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
               scale_pos_weight=None, subsample=None, tree_method=None,
               use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
                                    transformers=[('num', MinMaxScaler(),
                                                   Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
@@ -2120,16 +2190,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', ExtraTreesClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.08180642 0.08224797 0.08108139 0.08005643 0.08400655 0.08195567
- 0.083637   0.08279467 0.08375001 0.08794379]
+value: [0.07962322 0.07938647 0.0790019  0.07999277 0.07923889 0.0796628
+ 0.0828433  0.08033228 0.08008289 0.08019423]
 
-mean value: 0.0829279899597168
+mean value: 0.0800358772277832
 
 key: score_time 
-value: [0.01682615 0.00449514 0.01753378 0.01695251 0.01697278 0.01840544
- 0.01810956 0.01845503 0.01751947 0.01944685]
+value: [0.0167439  0.00441599 0.01676273 0.01670766 0.01685452 0.01676798
+ 0.01670623 0.0169127  0.01742005 0.01756716]
 
-mean value: 0.016471672058105468
+mean value: 0.015685892105102538
 
 key: test_mcc 
 value: [ 0.48795004         nan  0.74535599 -0.06666667  0.48795004  0.6
@@ -2214,55 +2284,7 @@ Accuracy on Blind test: 0.7
 
 Model_name: Extra Tree 
 Model func: ExtraTreeClassifier(random_state=42) 
-List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
                        n_estimators=1000, n_jobs=10, oob_score=True,
                        random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
               colsample_bynode=None, colsample_bytree=None,
@@ -2291,16 +2313,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', ExtraTreeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.00917554 0.00918388 0.01030898 0.00913    0.00861025 0.00867605
- 0.00893021 0.00875688 0.00861931 0.00882649]
+value: [0.00855803 0.00837731 0.0085175  0.00922489 0.00838661 0.0093317
+ 0.00823355 0.00841951 0.00875092 0.00846076]
 
-mean value: 0.009021759033203125
+mean value: 0.008626079559326172
 
 key: score_time 
-value: [0.00911689 0.00463772 0.00918674 0.00968862 0.00868392 0.00932837
- 0.00896454 0.00857091 0.00860333 0.00897813]
+value: [0.0088222  0.00437546 0.00849724 0.00917506 0.00854778 0.00906491
+ 0.00846219 0.0085783  0.00849533 0.00848794]
 
-mean value: 0.008575916290283203
+mean value: 0.00825064182281494
 
 key: test_mcc 
 value: [ 0.1490712          nan  0.46666667 -0.06666667  0.1490712   1.
@@ -2383,20 +2405,7 @@ MCC on Blind test: 0.28
 Accuracy on Blind test: 0.6
 
 Model_name: Random Forest 
-Model func: RandomForestClassifier(n_estimators=1000, random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Model func: RandomForestClassifier(n_estimators=1000, random_state=42) /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
     return cache[method]
@@ -2444,9 +2453,20 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['Other'] in column 5 during transform
 
   warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-Pipeline(steps=[('prep',
+
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
                                    transformers=[('num', MinMaxScaler(),
                                                   Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
@@ -2464,16 +2484,16 @@ Pipeline(steps=[('prep',
                  RandomForestClassifier(n_estimators=1000, random_state=42))])
 
 key: fit_time 
-value: [1.00864148 1.00945377 1.01145029 1.02092719 1.02262855 1.02108669
- 1.03264499 1.03610301 0.99609876 0.99602723]
+value: [0.99221253 1.00797582 1.01229548 1.01305676 0.99885559 0.99018335
+ 0.99298692 0.98534179 0.99022436 0.99618149]
 
-mean value: 1.0155061960220337
+mean value: 0.9979314088821412
 
 key: score_time 
-value: [0.09197426 0.00447416 0.09149528 0.14959788 0.09277177 0.09351969
- 0.09247947 0.09435916 0.08660769 0.09346843]
+value: [0.09150147 0.00473714 0.09475374 0.14177513 0.09324384 0.088516
+ 0.09004092 0.0887692  0.08675432 0.09039307]
 
-mean value: 0.08907477855682373
+mean value: 0.08704848289489746
 
 key: test_mcc 
 value: [ 0.48795004         nan  0.74535599  0.25819889  0.48795004  0.77459667
@@ -2572,7 +2592,9 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), (
               predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
               scale_pos_weight=None, subsample=None, tree_method=None,
               use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
                                    transformers=[('num', MinMaxScaler(),
                                                   Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
@@ -2710,16 +2732,16 @@ ValueError: Found unknown categories ['Other'] in column 5 during transform
   _warn_prf(average, modifier, msg_start, len(result))
 
 key: fit_time 
-value: [1.67271042 0.90515995 0.86359429 0.90234375 0.96636534 0.87128806
- 0.8657763  0.87282944 0.88145161 0.83073807]
+value: [1.71230912 0.82662535 0.84278703 0.86609745 0.93222427 0.88134384
+ 0.90477657 0.81548691 0.85592294 0.81256628]
 
-mean value: 0.9632257223129272
+mean value: 0.9450139760971069
 
 key: score_time 
-value: [0.21594787 0.00481105 0.17994237 0.17904091 0.21373725 0.20884776
- 0.17252564 0.20995092 0.20395184 0.22969842]
+value: [0.17363358 0.00458622 0.17959046 0.21361065 0.21203494 0.17785597
+ 0.22314191 0.22879744 0.1943121  0.22552323]
 
-mean value: 0.18184540271759034
+mean value: 0.18330864906311034
 
 key: test_mcc 
 value: [0.48795004        nan 0.46666667 0.48795004 0.48795004 0.74535599
@@ -2839,16 +2861,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', BernoulliNB())])
 
 key: fit_time 
-value: [0.00874162 0.00872374 0.00922036 0.00902081 0.00892901 0.00882077
- 0.0089941  0.00903893 0.00934553 0.00889754]
+value: [0.00989461 0.00995517 0.01037693 0.00869536 0.00957036 0.00846767
+ 0.00956583 0.00920486 0.00919366 0.00914788]
 
-mean value: 0.008973240852355957
+mean value: 0.009407234191894532
 
 key: score_time 
-value: [0.00875211 0.00437403 0.00965929 0.00880575 0.00879216 0.00873375
- 0.00872302 0.00874352 0.00864363 0.0090158 ]
+value: [0.00942397 0.00489521 0.00996447 0.0094893  0.00896859 0.00871682
+ 0.00867128 0.00852704 0.00943971 0.00916314]
 
-mean value: 0.00842430591583252
+mean value: 0.008725953102111817
 
 key: test_mcc 
 value: [-0.29277002         nan  0.1490712   0.          0.46666667  0.25819889
@@ -3029,16 +3051,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                                validate_parameters=None, verbosity=0))])
 
 key: fit_time 
-value: [0.08834314 0.03863764 0.04068565 0.0569241  0.07989025 0.09637451
- 0.07425714 0.03369784 0.03540182 0.0447278 ]
+value: [0.14565539 0.03244257 0.05331469 0.03653431 0.03764868 0.03769803
+ 0.0387702  0.03797412 0.03629756 0.06974673]
 
-mean value: 0.058893990516662595
+mean value: 0.05260822772979736
 
 key: score_time 
-value: [0.01159501 0.00512886 0.0114882  0.01043797 0.01063323 0.01238704
- 0.01091051 0.01177049 0.01157951 0.01068592]
+value: [0.01081181 0.00494266 0.01059127 0.01046586 0.01142001 0.01110435
+ 0.01143312 0.01069999 0.01111579 0.01013684]
 
-mean value: 0.010661673545837403
+mean value: 0.01027216911315918
 
 key: test_mcc 
 value: [0.74535599        nan 0.74535599 1.         1.         0.6
@@ -3199,16 +3221,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', LinearDiscriminantAnalysis())])
 
 key: fit_time 
-value: [0.02861381 0.01710796 0.01704741 0.02752972 0.01703906 0.0173142
- 0.0171032  0.01710773 0.01708961 0.01672173]
+value: [0.02565622 0.01628876 0.04023051 0.03998733 0.04027724 0.03976345
+ 0.03319478 0.03937459 0.03932738 0.03963566]
 
-mean value: 0.01926743984222412
+mean value: 0.035373592376708986
 
 key: score_time 
-value: [0.01201081 0.00585961 0.01186156 0.0116775  0.01184201 0.01153588
- 0.01147604 0.01150894 0.01155281 0.01160359]
+value: [0.01197481 0.00604296 0.02297831 0.0204556  0.02054095 0.02182841
+ 0.02347851 0.02111363 0.02293038 0.02010679]
 
-mean value: 0.011092877388000489
+mean value: 0.01914503574371338
 
 key: test_mcc 
 value: [ 0.77459667         nan  0.77459667  0.25819889  0.1490712   0.25819889
@@ -3424,16 +3446,16 @@ Pipeline(steps=[('prep',
                 ('model', MultinomialNB())])
 
 key: fit_time 
-value: [0.01906657 0.00880098 0.0084815  0.00850773 0.00844955 0.0085187
- 0.00853658 0.00868392 0.00857282 0.00848532]
+value: [0.01452279 0.00926256 0.01047921 0.00929594 0.00924563 0.00948405
+ 0.00887465 0.00973344 0.00907183 0.00911689]
 
-mean value: 0.009610366821289063
+mean value: 0.009908699989318847
 
 key: score_time 
-value: [0.00889063 0.00434232 0.00853539 0.00835586 0.00840497 0.00841403
- 0.00842023 0.00843906 0.00841045 0.00846195]
+value: [0.00940919 0.00476885 0.00952935 0.00888324 0.00898743 0.00851965
+ 0.00895286 0.00930977 0.00942397 0.00866628]
 
-mean value: 0.008067488670349121
+mean value: 0.008645057678222656
 
 key: test_mcc 
 value: [-0.06666667         nan  0.74535599  0.1490712   0.74535599  0.48795004
@@ -3555,16 +3577,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                  PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
 
 key: fit_time 
-value: [0.00974989 0.01256609 0.01255202 0.01197124 0.01251864 0.01321697
- 0.01367021 0.012501   0.01297402 0.01391482]
+value: [0.01042056 0.01261353 0.01291299 0.0122683  0.01264381 0.01341867
+ 0.01380587 0.02929306 0.03145814 0.01436043]
 
-mean value: 0.012563490867614746
+mean value: 0.016319537162780763
 
 key: score_time 
-value: [0.00909543 0.00559807 0.01082706 0.01195002 0.01125741 0.01126671
- 0.01122785 0.0112493  0.01124573 0.01133037]
+value: [0.00941873 0.00572133 0.01120472 0.01170421 0.01149988 0.0115881
+ 0.01157069 0.01993561 0.01752448 0.01183438]
 
-mean value: 0.010504794120788575
+mean value: 0.012200212478637696
 
 key: test_mcc 
 value: [ 0.48795004         nan  0.46666667  0.25819889  0.46666667  0.29277002
@@ -3733,16 +3755,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', SGDClassifier(n_jobs=10, random_state=42))])
 
 key: fit_time 
-value: [0.01293874 0.01226783 0.01234913 0.01200628 0.01229644 0.01227403
- 0.0121088  0.01211166 0.01241088 0.01239038]
+value: [0.01315737 0.01237726 0.01252127 0.01203179 0.01235271 0.01230645
+ 0.01251531 0.01241851 0.01265669 0.01256299]
 
-mean value: 0.01231541633605957
+mean value: 0.012490034103393555
 
 key: score_time 
-value: [0.00994158 0.00611711 0.01135921 0.01127291 0.01130056 0.01134491
- 0.01127958 0.01128697 0.01127529 0.01119423]
+value: [0.01045203 0.00614882 0.01162457 0.01147652 0.01155424 0.0219295
+ 0.01169086 0.01164365 0.01157975 0.01150036]
 
-mean value: 0.010637235641479493
+mean value: 0.011960029602050781
 
 key: test_mcc 
 value: [ 0.48795004         nan  0.06666667  0.46666667  0.46666667  0.77459667
@@ -3911,16 +3933,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', AdaBoostClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.08456826 0.07948041 0.08607769 0.07694125 0.076864   0.07677102
- 0.07680297 0.08007669 0.07709074 0.07826424]
+value: [0.08493304 0.08175111 0.08369136 0.08218789 0.08108163 0.08307648
+ 0.08458972 0.08156514 0.08213234 0.08229351]
 
-mean value: 0.07929372787475586
+mean value: 0.08273022174835205
 
 key: score_time 
-value: [0.01530957 0.00458312 0.01483393 0.01464057 0.01444936 0.01463532
- 0.01460624 0.01483655 0.01560974 0.01539278]
+value: [0.01478672 0.00491738 0.01560259 0.01490855 0.015692   0.0162847
+ 0.01612735 0.0150218  0.01616716 0.01500249]
 
-mean value: 0.013889718055725097
+mean value: 0.014451074600219726
 
 key: test_mcc 
 value: [0.74535599        nan 0.25819889 1.         1.         0.6
@@ -4131,16 +4153,16 @@ Pipeline(steps=[('prep',
                                    random_state=42))])
 
 key: fit_time 
-value: [0.03413796 0.02626228 0.03821015 0.04624867 0.04347873 0.03817987
- 0.04379535 0.03776097 0.0358882  0.04368043]
+value: [0.03491879 0.02965927 0.05070114 0.04325414 0.03710151 0.04708791
+ 0.04051805 0.05239248 0.0487051  0.02523303]
 
-mean value: 0.03876426219940186
+mean value: 0.04095714092254639
 
 key: score_time 
-value: [0.01770973 0.00455666 0.03768253 0.02266836 0.02085519 0.02388167
- 0.03207541 0.02712274 0.03294992 0.02003694]
+value: [0.02097654 0.00495124 0.03698468 0.01774836 0.03910136 0.03681517
+ 0.02154684 0.03762245 0.01965308 0.01705694]
 
-mean value: 0.023953914642333984
+mean value: 0.02524566650390625
 
 key: test_mcc 
 value: [0.74535599        nan 0.74535599 1.         1.         0.6
@@ -4259,16 +4281,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', GaussianProcessClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.01672459 0.01487875 0.01532412 0.01514101 0.01528788 0.01714444
- 0.015203   0.01521587 0.01526046 0.02377939]
+value: [0.0122931  0.01473594 0.01660824 0.01521087 0.0151813  0.01513147
+ 0.02177501 0.01516676 0.01508951 0.01509905]
 
-mean value: 0.016395950317382814
+mean value: 0.015629124641418458
 
 key: score_time 
-value: [0.01178098 0.00585222 0.01161599 0.01168609 0.01185441 0.01175952
- 0.01163292 0.01173878 0.01166463 0.01176715]
+value: [0.01127362 0.00562072 0.01160932 0.01198483 0.01171732 0.01169324
+ 0.01196361 0.01168847 0.01174283 0.01177049]
 
-mean value: 0.011135268211364745
+mean value: 0.011106443405151368
 
 key: test_mcc 
 value: [ 0.1490712          nan  0.74535599 -0.4472136   0.48795004  0.77459667
@@ -4430,16 +4452,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', GradientBoostingClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.16644502 0.15082502 0.17178988 0.15492988 0.15712714 0.15647697
- 0.15715313 0.15811729 0.12830138 0.17333484]
+value: [0.16718197 0.15094709 0.1711874  0.15357113 0.15574336 0.15652514
+ 0.15600491 0.15759158 0.1278398  0.17194462]
 
-mean value: 0.1574500560760498
+mean value: 0.15685369968414306
 
 key: score_time 
-value: [0.00920653 0.00468278 0.00959659 0.00929022 0.009588   0.0092895
- 0.00926042 0.00923944 0.00946832 0.00917864]
+value: [0.00918221 0.00462985 0.00915265 0.00921178 0.00939751 0.00919819
+ 0.00938702 0.00930691 0.00974512 0.00909448]
 
-mean value: 0.008880043029785156
+mean value: 0.008830571174621582
 
 key: test_mcc 
 value: [0.74535599        nan 0.46666667 0.77459667 1.         0.6
@@ -4671,16 +4693,16 @@ Pipeline(steps=[('prep',
                 ('model', QuadraticDiscriminantAnalysis())])
 
 key: fit_time 
-value: [0.01092577 0.01285172 0.01395202 0.01353192 0.01414061 0.01355553
- 0.01369452 0.01398444 0.01363087 0.01456404]
+value: [0.01070952 0.01322937 0.01341176 0.02545214 0.02427244 0.02728081
+ 0.0287056  0.0138135  0.0139854  0.01462626]
 
-mean value: 0.013483142852783203
+mean value: 0.01854867935180664
 
 key: score_time 
-value: [0.01237583 0.00615168 0.01228261 0.01294899 0.01175308 0.01328015
- 0.01323271 0.01173949 0.01321077 0.01327252]
+value: [0.01166081 0.00594354 0.01179552 0.02356005 0.02031541 0.0121429
+ 0.01230502 0.0118804  0.01562524 0.02522063]
 
-mean value: 0.012024784088134765
+mean value: 0.015044951438903808
 
 key: test_mcc 
 value: [-0.46666667         nan -0.29277002 -0.29277002 -0.25819889  0.1490712
@@ -4793,16 +4815,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', RidgeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.02124691 0.01254821 0.01597905 0.03487706 0.01338506 0.01416159
- 0.01252818 0.01272392 0.03272414 0.03284836]
+value: [0.02903557 0.03166127 0.02882695 0.03186989 0.03176379 0.03159523
+ 0.03179622 0.03586578 0.03634501 0.0317626 ]
 
-mean value: 0.020302248001098634
+mean value: 0.032052230834960935
 
 key: score_time 
-value: [0.01174521 0.00608397 0.01180792 0.01995564 0.01202583 0.01157284
- 0.01147699 0.01158595 0.02148509 0.02132988]
+value: [0.0200243  0.01208067 0.01757097 0.01424813 0.02132797 0.01160336
+ 0.02168918 0.02083015 0.02316499 0.0211401 ]
 
-mean value: 0.01390693187713623
+mean value: 0.018367981910705565
 
 key: test_mcc 
 value: [0.74535599        nan 0.46666667 0.6        0.74535599 0.6
@@ -4918,12 +4940,12 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['Other'] in column 5 during transform
 
   warnings.warn(
-/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:107: SettingWithCopyWarning: 
+/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:115: SettingWithCopyWarning: 
 A value is trying to be set on a copy of a slice from a DataFrame
 
 See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
   baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
-/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:110: SettingWithCopyWarning: 
+/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:118: SettingWithCopyWarning: 
 A value is trying to be set on a copy of a slice from a DataFrame
 
 See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
@@ -4980,16 +5002,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', RidgeClassifierCV(cv=10))])
 
 key: fit_time 
-value: [0.18935609 0.18282723 0.19223666 0.17152286 0.21240568 0.25662017
- 0.20577478 0.19005656 0.19769907 0.12422442]
+value: [0.17595625 0.17720246 0.23368216 0.2604897  0.20024538 0.22788954
+ 0.17445135 0.18081164 0.20982456 0.23728633]
 
-mean value: 0.19227235317230223
+mean value: 0.20778393745422363
 
 key: score_time 
-value: [0.02291918 0.01264501 0.02030349 0.02279305 0.02387357 0.02354836
- 0.02319574 0.02317953 0.01894236 0.01210165]
+value: [0.0230782  0.01266408 0.01375008 0.02251482 0.01994252 0.0219214
+ 0.0211935  0.02238035 0.02076578 0.01844668]
 
-mean value: 0.020350193977355956
+mean value: 0.019665741920471193
 
 key: test_mcc 
 value: [0.74535599        nan 0.46666667 0.6        0.74535599 0.6
@@ -5205,16 +5227,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', LogisticRegression(random_state=42))])
 
 key: fit_time 
-value: [0.02668691 0.02563429 0.02581406 0.02586079 0.02586055 0.02636909
- 0.02358246 0.02464199 0.02246094 0.02333188]
+value: [0.02724361 0.02601719 0.02918768 0.02592826 0.02982807 0.02673316
+ 0.02467227 0.02559614 0.02310491 0.02411938]
 
-mean value: 0.02502429485321045
+mean value: 0.026243066787719725
 
 key: score_time 
-value: [0.01169181 0.00628114 0.00639367 0.0120008  0.01172805 0.0117023
- 0.01168251 0.01178622 0.01171184 0.01170659]
+value: [0.0119884  0.00643563 0.01028681 0.01191902 0.01183963 0.01192975
+ 0.01201987 0.0121851  0.01160169 0.0118041 ]
 
-mean value: 0.010668492317199707
+mean value: 0.011201000213623047
 
 key: test_mcc 
 value: [0.65465367        nan        nan 0.81649658 0.6        0.81649658
@@ -5980,16 +6002,16 @@ Pipeline(steps=[('prep',
                 ('model', LogisticRegressionCV(random_state=42))])
 
 key: fit_time 
-value: [0.67839217 0.48034787 0.64701414 0.51438808 0.60679531 0.65355921
- 0.49431872 0.5318253  0.58624101 0.55083251]
+value: [0.59747982 0.64376473 0.71673417 0.69791865 0.47388649 1.05191803
+ 0.63061428 0.58212113 0.71103501 0.60764217]
 
-mean value: 0.5743714332580566
+mean value: 0.6713114500045776
 
 key: score_time 
-value: [0.01309419 0.00647235 0.00659251 0.01193786 0.01207781 0.01458716
- 0.01210022 0.01188588 0.01342583 0.01195741]
+value: [0.01522326 0.0066607  0.00642395 0.01201344 0.01186037 0.01513076
+ 0.01198053 0.01196361 0.01532221 0.01208282]
 
-mean value: 0.011413121223449707
+mean value: 0.011866164207458497
 
 key: test_mcc 
 value: [0.6               nan        nan 0.81649658 0.40824829 0.81649658
@@ -6107,16 +6129,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', GaussianNB())])
 
 key: fit_time 
-value: [0.01233888 0.01138854 0.01001382 0.00979853 0.00967813 0.00976253
- 0.00926352 0.00987411 0.00966692 0.00973201]
+value: [0.0123117  0.00990081 0.00964355 0.0083921  0.00831342 0.00837278
+ 0.00830507 0.00861096 0.00842547 0.0087893 ]
 
-mean value: 0.01015169620513916
+mean value: 0.00910651683807373
 
 key: score_time 
-value: [0.01177788 0.00480366 0.00498605 0.00967026 0.00950933 0.00950623
- 0.00948453 0.00956655 0.00957298 0.0094316 ]
+value: [0.02768016 0.0048759  0.00427985 0.00843048 0.00844884 0.0084765
+ 0.00845861 0.00852132 0.00856638 0.00871158]
 
-mean value: 0.008830904960632324
+mean value: 0.0096449613571167
 
 key: test_mcc 
 value: [-0.33333333         nan         nan  0.2         0.5         0.21821789
@@ -6166,7 +6188,8 @@ value: [0.63235294 0.61111111 0.79245283 0.82352941 0.74576271 0.72131148
 mean value: 0.7575522057355462
 
 key: test_recall 
-value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+value: [0.8 nan nan 0.6 1.  0.8 0.6 1.  0.8 1. ]
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
     return cache[method]
@@ -6262,7 +6285,6 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
   warnings.warn(
-[0.8 nan nan 0.6 1.  0.8 0.6 1.  0.8 1. ]
 
 mean value: nan
 
@@ -6330,16 +6352,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', BernoulliNB())])
 
 key: fit_time 
-value: [0.01022792 0.00995421 0.01036096 0.00926161 0.01014042 0.00990748
- 0.00995111 0.00963116 0.00946689 0.01010084]
+value: [0.00870204 0.00929213 0.00878859 0.00928044 0.00863266 0.00860143
+ 0.00866652 0.00958157 0.0096643  0.00961232]
 
-mean value: 0.009900259971618652
+mean value: 0.009082198143005371
 
 key: score_time 
-value: [0.00955129 0.00466061 0.00484204 0.00874567 0.00902104 0.00952482
- 0.00964499 0.00959897 0.00870395 0.00953412]
+value: [0.00865149 0.00441241 0.00431967 0.00865722 0.00870681 0.00857425
+ 0.00904512 0.00933361 0.00930762 0.00936174]
 
-mean value: 0.008382749557495118
+mean value: 0.008036994934082031
 
 key: test_mcc 
 value: [0.21821789        nan        nan 0.21821789 0.6        0.40824829
@@ -6553,16 +6575,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', KNeighborsClassifier())])
 
 key: fit_time 
-value: [0.00974655 0.00932145 0.01000047 0.00874233 0.00839186 0.00837994
- 0.00840974 0.00860238 0.00835204 0.00856972]
+value: [0.00962067 0.00819492 0.00826192 0.00823355 0.00825858 0.0082972
+ 0.00824308 0.0086062  0.0086453  0.00830889]
 
-mean value: 0.00885164737701416
+mean value: 0.00846703052520752
 
 key: score_time 
-value: [0.01500773 0.00461984 0.00566602 0.01129222 0.00940275 0.00949812
- 0.01562929 0.00947976 0.00929308 0.00932264]
+value: [0.0149827  0.00423265 0.00422812 0.00957608 0.00942659 0.0144031
+ 0.00948668 0.00979161 0.01269317 0.00941229]
 
-mean value: 0.00992114543914795
+mean value: 0.009823298454284668
 
 key: test_mcc 
 value: [0.2               nan        nan 0.5        0.5        0.
@@ -6872,16 +6894,16 @@ Pipeline(steps=[('prep',
                 ('model', SVC(random_state=42))])
 
 key: fit_time 
-value: [0.01139522 0.00962234 0.0094831  0.00941205 0.0092721  0.00994563
- 0.00933862 0.00987744 0.00951862 0.00935507]
+value: [0.00986791 0.01017141 0.00918722 0.00919867 0.0091598  0.00916481
+ 0.00927687 0.00930643 0.00905871 0.00913   ]
 
-mean value: 0.009722018241882324
+mean value: 0.00935218334197998
 
 key: score_time 
-value: [0.00916266 0.00445056 0.00433707 0.00926876 0.00877261 0.00935793
- 0.00875735 0.00887942 0.00921822 0.00887632]
+value: [0.00940585 0.0044322  0.00426841 0.00933194 0.0090754  0.00920653
+ 0.0087769  0.0087471  0.0087831  0.00867248]
 
-mean value: 0.008108091354370118
+mean value: 0.008069992065429688
 
 key: test_mcc 
 value: [0.2               nan        nan 0.81649658 0.65465367 0.81649658
@@ -6999,16 +7021,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', MLPClassifier(max_iter=500, random_state=42))])
 
 key: fit_time 
-value: [0.37492514 0.37046218 0.38234639 0.44610929 0.35609388 0.35016608
- 0.38355708 0.37324095 0.34541011 0.59514403]
+value: [0.37450528 0.35082269 0.39461231 0.50876474 0.3729012  0.39715028
+ 0.39133334 0.37829828 0.58005452 0.37452221]
 
-mean value: 0.39774551391601565
+mean value: 0.4122964859008789
 
 key: score_time 
-value: [0.01198101 0.00661445 0.00665283 0.01227736 0.01198196 0.0120244
- 0.01200557 0.01206875 0.01202154 0.0120492 ]
+value: [0.01205969 0.00659204 0.00670195 0.01243973 0.01200485 0.01204276
+ 0.01203871 0.01203322 0.012043   0.01204824]
 
-mean value: 0.010967707633972168
+mean value: 0.011000418663024902
 
 key: test_mcc 
 value: [0.2               nan        nan 0.5        0.6        0.81649658
@@ -7215,16 +7237,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', DecisionTreeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.01511621 0.01407719 0.01127934 0.01145411 0.01078749 0.01093793
- 0.01074314 0.01044631 0.01040411 0.01171279]
+value: [0.0144136  0.01382351 0.01106048 0.01080775 0.0104351  0.01044345
+ 0.01019263 0.00999427 0.0097239  0.01033235]
 
-mean value: 0.01169586181640625
+mean value: 0.011122703552246094
 
 key: score_time 
-value: [0.01174808 0.00465941 0.00485921 0.00918746 0.00877357 0.00866652
- 0.00861716 0.0088768  0.00946498 0.00948787]
+value: [0.01140714 0.00496936 0.00459981 0.00874352 0.00843453 0.00867224
+ 0.00845146 0.0083878  0.00837374 0.00837708]
 
-mean value: 0.008434104919433593
+mean value: 0.008041667938232421
 
 key: test_mcc 
 value: [1.                nan        nan 0.6        0.81649658 0.6
@@ -7527,16 +7549,16 @@ Pipeline(steps=[('prep',
                 ('model', ExtraTreesClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.08262682 0.08500862 0.0852046  0.08543134 0.08545947 0.08390474
- 0.08447599 0.08485103 0.08712196 0.08414125]
+value: [0.08056545 0.08084106 0.08170795 0.08255649 0.08109879 0.08101583
+ 0.08092213 0.08285975 0.08272982 0.08114171]
 
-mean value: 0.08482258319854737
+mean value: 0.0815438985824585
 
 key: score_time 
-value: [0.01772976 0.0046699  0.00469708 0.01746058 0.01755953 0.01747441
- 0.01821804 0.01745749 0.01838636 0.01701069]
+value: [0.01655602 0.00445127 0.0046792  0.01720977 0.01671934 0.01684332
+ 0.01761675 0.01713228 0.0171361  0.01674366]
 
-mean value: 0.015066385269165039
+mean value: 0.014508771896362304
 
 key: test_mcc 
 value: [0.81649658        nan        nan 0.6        0.81649658 0.65465367
@@ -7647,16 +7669,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', ExtraTreeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.00895452 0.00875449 0.00881481 0.00910234 0.00885272 0.00899577
- 0.00944829 0.00882363 0.00928545 0.00872445]
+value: [0.00848937 0.00846291 0.00848842 0.00857568 0.00871515 0.00847149
+ 0.0084784  0.00839734 0.00870109 0.00886941]
 
-mean value: 0.008975648880004882
+mean value: 0.008564925193786621
 
 key: score_time 
-value: [0.00846648 0.00439858 0.00429606 0.00868988 0.00855374 0.00891685
- 0.0090065  0.00850105 0.00932431 0.00891042]
+value: [0.00837517 0.00423932 0.00420213 0.00869727 0.00871015 0.00843763
+ 0.00841641 0.00845981 0.00853753 0.00839472]
 
-mean value: 0.007906389236450196
+mean value: 0.007647013664245606
 
 key: test_mcc 
 value: [0.40824829        nan        nan 0.40824829 0.65465367 0.65465367
@@ -7864,16 +7886,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                  RandomForestClassifier(n_estimators=1000, random_state=42))])
 
 key: fit_time 
-value: [1.05830765 1.05878472 1.03069663 1.03076839 1.05383444 1.04063582
- 1.02942109 1.03039336 1.04405308 1.04590487]
+value: [1.02116418 1.0257473  1.08174706 1.0322454  1.0248096  1.0253284
+ 1.02173543 1.02932763 1.02692318 1.02705503]
 
-mean value: 1.0422800064086915
+mean value: 1.0316083192825318
 
 key: score_time 
-value: [0.09411407 0.00471926 0.00459909 0.09293795 0.09200311 0.08624506
- 0.08817077 0.08822632 0.08993959 0.09330368]
+value: [0.09387398 0.00442934 0.0045464  0.09425235 0.0929327  0.09006643
+ 0.09262896 0.09255028 0.08933902 0.09436941]
 
-mean value: 0.07342588901519775
+mean value: 0.07489888668060303
 
 key: test_mcc 
 value: [0.81649658        nan        nan 0.65465367 0.81649658 0.81649658
@@ -8203,16 +8225,16 @@ Pipeline(steps=[('prep',
                                         oob_score=True, random_state=42))])
 
 key: fit_time 
-value: [0.85999107 0.87137699 0.85217786 0.87492132 0.86495948 0.84439301
- 0.88340187 0.85964608 0.85427666 0.96079803]
+value: [0.80205727 0.85240149 0.84424591 0.98081374 0.8455205  0.89721966
+ 0.84490728 0.85584474 0.9097147  0.88582087]
 
-mean value: 0.8725942373275757
+mean value: 0.8718546152114868
 
 key: score_time 
-value: [0.2267487  0.00457191 0.00463676 0.16841388 0.15269232 0.22285342
- 0.1705575  0.19267631 0.23203325 0.16924214]
+value: [0.20501709 0.00500822 0.0045805  0.19695258 0.12441659 0.21853828
+ 0.22464013 0.22292161 0.20894432 0.14386797]
 
-mean value: 0.1544426202774048
+mean value: 0.1554887294769287
 
 key: test_mcc 
 value: [0.81649658        nan        nan 0.65465367 1.         0.81649658
@@ -8330,16 +8352,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', BernoulliNB())])
 
 key: fit_time 
-value: [0.02133918 0.00901937 0.00918055 0.00923777 0.00924492 0.00955749
- 0.00870299 0.00861335 0.00901318 0.00863385]
+value: [0.02338266 0.0094502  0.00960112 0.01006866 0.00941682 0.00956082
+ 0.00956559 0.00960374 0.0094769  0.00946879]
 
-mean value: 0.010254263877868652
+mean value: 0.010959529876708984
 
 key: score_time 
-value: [0.01596808 0.00419235 0.00490522 0.00853872 0.0092361  0.00877595
- 0.00848365 0.00854468 0.00853395 0.00849319]
+value: [0.0109334  0.00468445 0.00506949 0.00934649 0.00933862 0.00940108
+ 0.00930977 0.00933194 0.00933313 0.00930548]
 
-mean value: 0.008567190170288086
+mean value: 0.008605384826660156
 
 key: test_mcc 
 value: [0.21821789        nan        nan 0.21821789 0.6        0.40824829
@@ -8566,16 +8588,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                                validate_parameters=None, verbosity=0))])
 
 key: fit_time 
-value: [0.0385325  0.07943583 0.07845998 0.04148769 0.03596449 0.03703904
- 0.07786202 0.07349586 0.03462934 0.08684826]
+value: [0.03791428 0.039891   0.04524422 0.05228448 0.03879333 0.04074168
+ 0.03861666 0.04192305 0.03786802 0.04126   ]
 
-mean value: 0.05837550163269043
+mean value: 0.0414536714553833
 
 key: score_time 
-value: [0.01043248 0.00514793 0.00486374 0.01063561 0.01104069 0.01106238
- 0.01134038 0.0113461  0.01019478 0.01071811]
+value: [0.01134562 0.0051043  0.00507307 0.01123905 0.01100111 0.0111165
+ 0.01112223 0.01104426 0.0112226  0.01109934]
 
-mean value: 0.009678220748901368
+mean value: 0.009936809539794922
 
 key: test_mcc 
 value: [1.                nan        nan 1.         1.         0.6
@@ -8877,16 +8899,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', LinearDiscriminantAnalysis())])
 
 key: fit_time 
-value: [0.03867555 0.02319288 0.02706194 0.02529454 0.05284071 0.11862731
- 0.03852725 0.04383326 0.03280854 0.04009724]
+value: [0.02424765 0.03926849 0.02169585 0.02134705 0.03105235 0.01888251
+ 0.01905227 0.04464221 0.01857352 0.03482223]
 
-mean value: 0.04409592151641846
+mean value: 0.027358412742614746
 
 key: score_time 
-value: [0.0272522  0.00630307 0.00678635 0.01231098 0.02257872 0.02594495
- 0.01236582 0.01228881 0.02403021 0.02088833]
+value: [0.02080226 0.00596833 0.00588465 0.01170087 0.01165652 0.01164246
+ 0.01558542 0.01165223 0.01164913 0.02148533]
 
-mean value: 0.017074942588806152
+mean value: 0.012802720069885254
 
 key: test_mcc 
 value: [0.65465367        nan        nan 0.65465367 0.81649658 0.2
@@ -9093,16 +9115,16 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
   warnings.warn(
-[0.02235413 0.01077485 0.01104784 0.00940537 0.00992942 0.00974631
- 0.01046252 0.01258326 0.00965023 0.00989842]
+[0.01784444 0.00874496 0.00863242 0.00858641 0.00845957 0.00878644
+ 0.00931096 0.00858283 0.00858426 0.0092175 ]
 
-mean value: 0.011585235595703125
+mean value: 0.009674978256225587
 
 key: score_time 
-value: [0.010818   0.0060277  0.00485873 0.01032233 0.00985336 0.0115025
- 0.00923038 0.00966311 0.01026726 0.01118207]
+value: [0.00886011 0.00435925 0.0042181  0.00851774 0.00852823 0.00907302
+ 0.00854731 0.00856113 0.00867009 0.00898337]
 
-mean value: 0.009372544288635255
+mean value: 0.007831835746765136
 
 key: test_mcc 
 value: [0.                nan        nan 0.40824829 0.2        0.81649658
@@ -9221,16 +9243,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                  PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
 
 key: fit_time 
-value: [0.01186442 0.01330304 0.01402235 0.01382852 0.01425862 0.01367593
- 0.01314926 0.01479983 0.01317763 0.01350975]
+value: [0.00957155 0.01297784 0.01338482 0.01337719 0.01387429 0.01330495
+ 0.01264739 0.01446295 0.01269341 0.01309991]
 
-mean value: 0.01355893611907959
+mean value: 0.01293942928314209
 
 key: score_time 
-value: [0.0099504  0.00586057 0.00639057 0.01186395 0.01181006 0.01167011
- 0.01159191 0.01164412 0.01165104 0.01212764]
+value: [0.0086     0.00586104 0.00588679 0.01135945 0.01134872 0.01132274
+ 0.01132727 0.01135826 0.01136208 0.01132488]
 
-mean value: 0.010456037521362305
+mean value: 0.009975123405456542
 
 key: test_mcc 
 value: [0.40824829        nan        nan 0.81649658 0.6        0.81649658
@@ -9444,16 +9466,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', SGDClassifier(n_jobs=10, random_state=42))])
 
 key: fit_time 
-value: [0.01379657 0.01267099 0.01356316 0.01448107 0.01277828 0.01268101
- 0.01251578 0.01251745 0.01239395 0.01302099]
+value: [0.01255083 0.01222014 0.01237512 0.0122931  0.01268792 0.01234984
+ 0.01206565 0.01226902 0.01228833 0.01291275]
 
-mean value: 0.013041925430297852
+mean value: 0.012401270866394042
 
 key: score_time 
-value: [0.01063323 0.00612164 0.00623727 0.01173258 0.01149225 0.01172876
- 0.01138878 0.0114789  0.0114975  0.01158094]
+value: [0.01079416 0.00594401 0.00597167 0.01558065 0.01139426 0.01133919
+ 0.01131606 0.01139021 0.01131845 0.01140761]
 
-mean value: 0.010389184951782227
+mean value: 0.010645627975463867
 
 key: test_mcc 
 value: [0.                nan        nan 0.40824829 0.6        0.81649658
@@ -9763,16 +9785,16 @@ Pipeline(steps=[('prep',
                 ('model', AdaBoostClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.12627959 0.11029863 0.10071945 0.09155011 0.08990955 0.09815741
- 0.09487963 0.10431266 0.08676529 0.08815813]
+value: [0.09700513 0.08642244 0.08795667 0.08845925 0.08627391 0.08648276
+ 0.08725667 0.08758473 0.08776522 0.08776069]
 
-mean value: 0.09910304546356201
+mean value: 0.08829674720764161
 
 key: score_time 
-value: [0.0188818  0.0063622  0.00565004 0.01617861 0.01684022 0.01686287
- 0.01757121 0.01591206 0.0148592  0.01506853]
+value: [0.01467133 0.00474548 0.00496674 0.01501131 0.01460814 0.01463032
+ 0.01517797 0.01503372 0.01542163 0.01494527]
 
-mean value: 0.014418673515319825
+mean value: 0.01292119026184082
 
 key: test_mcc 
 value: [0.65465367        nan        nan 1.         1.         0.6
@@ -9885,16 +9907,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                                    random_state=42))])
 
 key: fit_time 
-value: [0.03483057 0.03842282 0.03175402 0.02523565 0.04487014 0.03826332
- 0.03274846 0.04900002 0.04477096 0.03169918]
+value: [0.03509688 0.04568958 0.0302496  0.03195786 0.02884889 0.03385544
+ 0.05140829 0.04297328 0.03036499 0.0328958 ]
 
-mean value: 0.037159514427185056
+mean value: 0.03633406162261963
 
 key: score_time 
-value: [0.02208352 0.00828576 0.00562572 0.02282691 0.02469707 0.02239871
- 0.03312135 0.02478552 0.02329421 0.02396297]
+value: [0.02096653 0.00952625 0.00549054 0.02393532 0.02444291 0.02201509
+ 0.03557968 0.02399278 0.02232742 0.02703047]
 
-mean value: 0.021108174324035646
+mean value: 0.021530699729919434
 
 key: test_mcc 
 value: [1.                nan        nan 1.         1.         0.6
@@ -10106,16 +10128,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', GaussianProcessClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.01488805 0.01686454 0.02178621 0.01625896 0.0167253  0.01618171
- 0.01634598 0.02672648 0.02222109 0.02587366]
+value: [0.01417232 0.01547766 0.02389431 0.02053976 0.0161953  0.01604009
+ 0.02661467 0.01625609 0.01613426 0.01619077]
 
-mean value: 0.019387197494506837
+mean value: 0.018151521682739258
 
 key: score_time 
-value: [0.0114007  0.00593019 0.00617599 0.01183033 0.01183867 0.01179862
- 0.01180983 0.01191282 0.01200986 0.01189065]
+value: [0.01128316 0.00590849 0.00611544 0.01191759 0.01187563 0.01181817
+ 0.01201296 0.01186419 0.0119288  0.01185203]
 
-mean value: 0.01065976619720459
+mean value: 0.010657644271850586
 
 key: test_mcc 
 value: [0.81649658        nan        nan 0.40824829 0.40824829 0.65465367
@@ -10440,16 +10462,16 @@ Pipeline(steps=[('prep',
                 ('model', GradientBoostingClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.23585057 0.21870351 0.2274735  0.20173454 0.19497991 0.21246147
- 0.19365239 0.22760653 0.19109488 0.20255446]
+value: [0.22346067 0.21064496 0.22775483 0.20194101 0.20313954 0.22486734
+ 0.20193768 0.22959948 0.19129062 0.20147419]
 
-mean value: 0.2106111764907837
+mean value: 0.21161103248596191
 
 key: score_time 
-value: [0.01025057 0.00483108 0.00519347 0.00991392 0.00999832 0.00909162
- 0.01007318 0.00993586 0.00930429 0.01151347]
+value: [0.009547   0.00503063 0.00500417 0.00985026 0.01011467 0.01005173
+ 0.00999117 0.01001072 0.01000142 0.00996733]
 
-mean value: 0.009010577201843261
+mean value: 0.0089569091796875
 
 key: test_mcc 
 value: [1.                nan        nan 0.40824829 0.81649658 0.6
@@ -10560,16 +10582,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', QuadraticDiscriminantAnalysis())])
 
 key: fit_time 
-value: [0.01237321 0.014323   0.01440096 0.01429844 0.0142436  0.01766992
- 0.01455665 0.01430321 0.01442313 0.01517463]
+value: [0.01206875 0.01536727 0.01444435 0.0142355  0.01431799 0.01439047
+ 0.01433372 0.01445889 0.01462865 0.01538968]
 
-mean value: 0.01457667350769043
+mean value: 0.014363527297973633
 
 key: score_time 
-value: [0.01152301 0.00598216 0.00594306 0.01162028 0.01163912 0.01281118
- 0.01165009 0.01300716 0.01285219 0.0118072 ]
+value: [0.01155353 0.00604582 0.0060041  0.01183033 0.01183915 0.01176929
+ 0.0118506  0.01583171 0.01520729 0.01568818]
 
-mean value: 0.010883545875549317
+mean value: 0.011761999130249024
 
 key: test_mcc 
 value: [0.65465367        nan        nan 0.21821789 0.81649658 0.40824829
@@ -10776,16 +10798,16 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', RidgeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.03715086 0.01295328 0.01913691 0.02935195 0.03285313 0.05956721
- 0.03427505 0.03019619 0.02694535 0.03023982]
+value: [0.02940583 0.03634906 0.03245473 0.03262615 0.03238082 0.02871442
+ 0.01274729 0.01276231 0.02434325 0.03222251]
 
-mean value: 0.031266975402832034
+mean value: 0.027400636672973634
 
 key: score_time 
-value: [0.01183128 0.00623417 0.00630283 0.02030396 0.02208257 0.03155112
- 0.02232051 0.01882124 0.02124667 0.02309394]
+value: [0.0238688  0.01215315 0.0120945  0.01822114 0.02281642 0.01169848
+ 0.01157546 0.01160693 0.0200026  0.02159095]
 
-mean value: 0.018378829956054686
+mean value: 0.016562843322753908
 
 key: test_mcc 
 value: [0.81649658        nan        nan 0.65465367 0.81649658 0.81649658
@@ -10981,12 +11003,12 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
   warnings.warn(
-/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:128: SettingWithCopyWarning: 
+/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:136: SettingWithCopyWarning: 
 A value is trying to be set on a copy of a slice from a DataFrame
 
 See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
   smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
-/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:131: SettingWithCopyWarning: 
+/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:139: SettingWithCopyWarning: 
 A value is trying to be set on a copy of a slice from a DataFrame
 
 See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
@@ -11056,16 +11078,16 @@ Pipeline(steps=[('prep',
                 ('model', RidgeClassifierCV(cv=10))])
 
 key: fit_time 
-value: [0.10745692 0.17037439 0.22704196 0.24130416 0.19570732 0.18806911
- 0.18574762 0.18309569 0.18836188 0.18505669]
+value: [0.10528541 0.17085743 0.17106438 0.21791244 0.18295407 0.1117785
+ 0.12498665 0.18348122 0.09826803 0.16421723]
 
-mean value: 0.18722157478332518
+mean value: 0.15308053493499757
 
 key: score_time 
-value: [0.01179934 0.01459885 0.01264095 0.02238703 0.02003407 0.02027035
- 0.01176071 0.02208042 0.02279568 0.02302146]
+value: [0.01189971 0.01263165 0.01266694 0.02156377 0.01987004 0.01182532
+ 0.0199995  0.02175546 0.01192117 0.02374363]
 
-mean value: 0.018138885498046875
+mean value: 0.0167877197265625
 
 key: test_mcc 
 value: [0.81649658        nan        nan 0.65465367 0.81649658 0.81649658
@@ -11182,71 +11204,86 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', LogisticRegression(random_state=42))])
 
 key: fit_time 
-value: [0.03232861 0.02856731 0.02625203 0.02819514 0.0350008  0.03808117
- 0.03861642 0.03147697 0.03068471 0.02853966]
+value: [0.0232265  0.02718496 0.02470827 0.04355407 0.02657723 0.02697539
+ 0.02654147 0.02200747 0.03470922 0.02654457]
 
-mean value: 0.031774282455444336
+mean value: 0.028202915191650392
 
 key: score_time 
-value: [0.01172328 0.01187897 0.0063858  0.01171517 0.0116365  0.01235962
- 0.0118258  0.01185846 0.01206326 0.01185799]
+value: [0.01154375 0.01186514 0.00623894 0.01168394 0.01151109 0.01170611
+ 0.01173449 0.01145792 0.01147342 0.01157451]
 
-mean value: 0.011330485343933105
+mean value: 0.011078929901123047
 
 key: test_mcc 
-value: [0.2        0.81649658        nan 0.40824829 0.6        0.65465367
+value: [0.40824829 0.65465367        nan 0.40824829 0.81649658 0.81649658
  0.40824829 0.65465367 0.21821789 0.81649658]
 
 mean value: nan
 
 key: train_mcc 
-value: [0.95555556 0.95650071 0.88910845 0.93356387 0.93356387 0.95555556
- 0.88910845 0.91201231 0.93356387 0.91201231]
+value: [0.88910845 0.91111111 0.88910845 0.91111111 0.93356387 0.88910845
+ 0.93356387 0.88910845 0.95555556 0.88910845]
 
-mean value: 0.9270544956323105
+mean value: 0.9090447765314074
 
 key: test_accuracy 
-value: [0.6 0.9 nan 0.7 0.8 0.8 0.7 0.8 0.6 0.9]
+value: [0.7 0.8 nan 0.7 0.9 0.9 0.7 0.8 0.6 0.9]
 
 mean value: nan
 
 key: train_accuracy 
-value: [0.97777778 0.97777778 0.94444444 0.96666667 0.96666667 0.97777778
- 0.94444444 0.95555556 0.96666667 0.95555556]
+value: [0.94444444 0.95555556 0.94444444 0.95555556 0.96666667 0.94444444
+ 0.96666667 0.94444444 0.97777778 0.94444444]
 
-mean value: 0.9633333333333334
+mean value: 0.9544444444444444
 
 key: test_fscore 
-value: [0.6        0.90909091        nan 0.66666667 0.8        0.75
+value: [0.72727273 0.83333333        nan 0.72727273 0.90909091 0.88888889
  0.66666667 0.83333333 0.66666667 0.90909091]
 
 mean value: nan
 
 key: train_fscore 
-value: [0.97777778 0.97826087 0.94505495 0.96629213 0.96703297 0.97777778
- 0.94382022 0.95652174 0.96703297 0.95652174]
+value: [0.94505495 0.95555556 0.94382022 0.95555556 0.96703297 0.94382022
+ 0.96703297 0.94382022 0.97777778 0.94505495]
 
-mean value: 0.9636093142053084
+mean value: 0.9544525387222017
 
 key: test_precision 
-value: [0.6        0.83333333        nan 0.75       0.8        1.
+value: [0.66666667 0.71428571        nan 0.66666667 0.83333333 1.
  0.75       0.71428571 0.57142857 0.83333333]
 
 mean value: nan
 
 key: train_precision 
-value: [0.97777778 0.95744681 0.93478261 0.97727273 0.95652174 0.97777778
- 0.95454545 0.93617021 0.95652174 0.93617021]
+value: [0.93478261 0.95555556 0.95454545 0.95555556 0.95652174 0.95454545
+ 0.95652174 0.95454545 0.97777778 0.93478261]
 
-mean value: 0.9564987058372812
+mean value: 0.9535133948177427
 
 key: test_recall 
-value: [0.6 1.  nan 0.6 0.8 0.6 0.6 1.  0.8 1. ]
+value: [0.8 1.  nan 0.8 1.  0.8 0.6 1.  0.8 1. ]
 
 mean value: nan
 
-key: train_recall 
-value:/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+key: train_recall /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
 STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
 
 Increase the number of iterations (max_iter) or scale the data as shown in:
@@ -11790,37 +11827,38 @@ Increase the number of iterations (max_iter) or scale the data as shown in:
 Please also refer to the documentation for alternative solver options:
     https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
   n_iter_i = _check_optimize_result(
- [0.97777778 1.         0.95555556 0.95555556 0.97777778 0.97777778
- 0.93333333 0.97777778 0.97777778 0.97777778]
 
-mean value: 0.9711111111111111
+value: [0.95555556 0.95555556 0.93333333 0.95555556 0.97777778 0.93333333
+ 0.97777778 0.93333333 0.97777778 0.95555556]
+
+mean value: 0.9555555555555556
 
 key: test_roc_auc 
-value: [0.6 0.9 nan 0.7 0.8 0.8 0.7 0.8 0.6 0.9]
+value: [0.7 0.8 nan 0.7 0.9 0.9 0.7 0.8 0.6 0.9]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [0.97777778 0.97777778 0.94444444 0.96666667 0.96666667 0.97777778
- 0.94444444 0.95555556 0.96666667 0.95555556]
+value: [0.94444444 0.95555556 0.94444444 0.95555556 0.96666667 0.94444444
+ 0.96666667 0.94444444 0.97777778 0.94444444]
 
-mean value: 0.9633333333333334
+mean value: 0.9544444444444445
 
 key: test_jcc 
-value: [0.42857143 0.83333333        nan 0.5        0.66666667 0.6
+value: [0.57142857 0.71428571        nan 0.57142857 0.83333333 0.8
  0.5        0.71428571 0.5        0.83333333]
 
 mean value: nan
 
 key: train_jcc 
-value: [0.95652174 0.95744681 0.89583333 0.93478261 0.93617021 0.95652174
- 0.89361702 0.91666667 0.93617021 0.91666667]
+value: [0.89583333 0.91489362 0.89361702 0.91489362 0.93617021 0.89361702
+ 0.93617021 0.89361702 0.95652174 0.89583333]
 
-mean value: 0.9300397008942337
+mean value: 0.9131167129201356
 
-MCC on Blind test: 0.37
+MCC on Blind test: 0.42
 
-Accuracy on Blind test: 0.72
+Accuracy on Blind test: 0.75
 
 Model_name: Logistic RegressionCV 
 Model func: LogisticRegressionCV(random_state=42) 
@@ -11853,101 +11891,100 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', LogisticRegressionCV(random_state=42))])
 
 key: fit_time 
-value: [0.55768061 0.55615211 0.58541512 0.71465158 0.54918408 0.57905293
- 0.62343574 0.64894342 0.58266759 0.57590222]
+value: [0.67746806 0.59781671 0.69200182 0.71163416 0.61749101 0.63935566
+ 0.77134275 0.60597873 0.80454826 0.74892688]
 
-mean value: 0.5973085403442383
+mean value: 0.686656403541565
 
 key: score_time 
-value: [0.0122087  0.01510215 0.00644422 0.01736808 0.01485038 0.01541257
- 0.01190186 0.01582146 0.01725316 0.01302958]
+value: [0.01449347 0.01300931 0.00646234 0.0151124  0.01494288 0.01490331
+ 0.01547623 0.0119102  0.01237154 0.01246715]
 
-mean value: 0.013939213752746583
+mean value: 0.01311488151550293
 
 key: test_mcc 
-value: [0.2        0.81649658        nan 0.65465367 0.40824829 0.81649658
- 0.81649658 0.65465367 0.5        0.81649658]
+value: [0.65465367 0.81649658        nan 0.2        0.65465367 1.
+ 0.65465367 0.65465367 0.21821789 0.81649658]
 
 mean value: nan
 
 key: train_mcc 
-value: [0.95555556 1.         0.95555556 1.         1.         1.
- 0.93356387 1.         1.         0.97801929]
+value: [1.         1.         1.         1.         1.         1.
+ 1.         0.95650071 1.         1.        ]
 
-mean value: 0.9822694276350975
+mean value: 0.9956500714595278
 
 key: test_accuracy 
-value: [0.6 0.9 nan 0.8 0.7 0.9 0.9 0.8 0.7 0.9]
+value: [0.8 0.9 nan 0.6 0.8 1.  0.8 0.8 0.6 0.9]
 
 mean value: nan
 
 key: train_accuracy 
-value: [0.97777778 1.         0.97777778 1.         1.         1.
- 0.96666667 1.         1.         0.98888889]
+value: [1.         1.         1.         1.         1.         1.
+ 1.         0.97777778 1.         1.        ]
 
-mean value: 0.991111111111111
+mean value: 0.9977777777777778
 
 key: test_fscore 
-value: [0.6        0.90909091        nan 0.83333333 0.72727273 0.88888889
- 0.88888889 0.83333333 0.76923077 0.90909091]
+value: [0.83333333 0.90909091        nan 0.6        0.83333333 1.
+ 0.75       0.83333333 0.66666667 0.90909091]
 
 mean value: nan
 
 key: train_fscore 
-value: [0.97777778 1.         0.97777778 1.         1.         1.
- 0.96629213 1.         1.         0.98901099]
+value: [1.         1.         1.         1.         1.         1.
+ 1.         0.97826087 1.         1.        ]
 
-mean value: 0.9910858679398006
+mean value: 0.9978260869565218
 
 key: test_precision 
-value: [0.6        0.83333333        nan 0.71428571 0.66666667 1.
- 1.         0.71428571 0.625      0.83333333]
+value: [0.71428571 0.83333333        nan 0.6        0.71428571 1.
+ 1.         0.71428571 0.57142857 0.83333333]
 
 mean value: nan
 
 key: train_precision 
-value: [0.97777778 1.         0.97777778 1.         1.         1.
- 0.97727273 1.         1.         0.97826087]
+value: [1.         1.         1.         1.         1.         1.
+ 1.         0.95744681 1.         1.        ]
 
-mean value: 0.99110891523935
+mean value: 0.9957446808510638
 
 key: test_recall 
-value: [0.6 1.  nan 1.  0.8 0.8 0.8 1.  1.  1. ]
+value: [1.  1.  nan 0.6 1.  1.  0.6 1.  0.8 1. ]
 
 mean value: nan
 
 key: train_recall 
-value: [0.97777778 1.         0.97777778 1.         1.         1.
- 0.95555556 1.         1.         1.        ]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.991111111111111
+mean value: 1.0
 
 key: test_roc_auc 
-value: [0.6 0.9 nan 0.8 0.7 0.9 0.9 0.8 0.7 0.9]
+value: [0.8 0.9 nan 0.6 0.8 1.  0.8 0.8 0.6 0.9]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [0.97777778 1.         0.97777778 1.         1.         1.
- 0.96666667 1.         1.         0.98888889]
+value: [1.         1.         1.         1.         1.         1.
+ 1.         0.97777778 1.         1.        ]
 
-mean value: 0.991111111111111
+mean value: 0.9977777777777778
 
 key: test_jcc 
-value: [0.42857143 0.83333333        nan 0.71428571 0.57142857 0.8
- 0.8        0.71428571 0.625      0.83333333]
+value: [0.71428571 0.83333333        nan 0.42857143 0.71428571 1.
+ 0.6        0.71428571 0.5        0.83333333]
 
 mean value: nan
 
 key: train_jcc 
-value: [0.95652174 1.         0.95652174 1.         1.         1.
- 0.93478261 1.         1.         0.97826087]
+value: [1.         1.         1.         1.         1.         1.
+ 1.         0.95744681 1.         1.        ]
 
-mean value: 0.9826086956521739
+mean value: 0.9957446808510638
 
-MCC on Blind test: 0.42
+MCC on Blind test: 0.48
 
-Accuracy on Blind test: 0.75
+Accuracy on Blind test: 0.78
 
 Model_name: Gaussian NB 
 Model func: GaussianNB() 
@@ -12028,101 +12065,101 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', GaussianNB())])
 
 key: fit_time 
-value: [0.01230741 0.01123023 0.00950027 0.00946021 0.00964046 0.00955129
- 0.00977182 0.00977874 0.00968146 0.00962782]
+value: [0.01217723 0.00952673 0.00885081 0.00973725 0.00852966 0.00841856
+ 0.00850034 0.0093751  0.0097971  0.00986147]
 
-mean value: 0.010054969787597656
+mean value: 0.00947742462158203
 
 key: score_time 
-value: [0.01180935 0.00984812 0.00469971 0.00937915 0.00949502 0.00958204
- 0.00940156 0.00952315 0.00942564 0.00944281]
+value: [0.01179314 0.00913548 0.00443888 0.00915837 0.00861049 0.00860596
+ 0.00853419 0.00947094 0.00863123 0.0086143 ]
 
-mean value: 0.00926065444946289
+mean value: 0.008699297904968262
 
 key: test_mcc 
-value: [-0.21821789  0.33333333         nan  0.2         0.5        -0.21821789
-  0.40824829  0.          0.21821789  0.5       ]
+value: [0.         0.40824829        nan 0.2        0.5        0.5
+ 0.         0.40824829 0.21821789 0.21821789]
 
 mean value: nan
 
 key: train_mcc 
-value: [0.53495589 0.66097134 0.71269665 0.84970583 0.54684459 0.60350985
- 0.8230355  0.53031442 0.76486616 0.58456547]
+value: [0.43305953 0.66097134 0.68957028 0.82548988 0.60971232 0.56980288
+ 0.73405869 0.6350529  0.73624773 0.56454844]
 
-mean value: 0.6611465698795331
+mean value: 0.6458513998944028
 
 key: test_accuracy 
-value: [0.4 0.6 nan 0.6 0.7 0.4 0.7 0.5 0.6 0.7]
+value: [0.5 0.7 nan 0.6 0.7 0.7 0.5 0.7 0.6 0.6]
 
 mean value: nan
 
 key: train_accuracy 
-value: [0.73333333 0.82222222 0.85555556 0.92222222 0.75555556 0.78888889
- 0.91111111 0.75555556 0.87777778 0.77777778]
+value: [0.68888889 0.82222222 0.84444444 0.91111111 0.8        0.77777778
+ 0.86666667 0.81111111 0.86666667 0.77777778]
 
-mean value: 0.82
+mean value: 0.8166666666666667
 
 key: test_fscore 
-value: [0.5        0.71428571        nan 0.6        0.76923077 0.5
- 0.72727273 0.61538462 0.66666667 0.76923077]
+value: [0.61538462 0.72727273        nan 0.6        0.76923077 0.76923077
+ 0.44444444 0.72727273 0.66666667 0.66666667]
 
 mean value: nan
 
 key: train_fscore 
-value: [0.78571429 0.84       0.85057471 0.92631579 0.79245283 0.81553398
- 0.90909091 0.78431373 0.86746988 0.80769231]
+value: [0.75       0.84       0.84782609 0.91489362 0.81632653 0.8
+ 0.86956522 0.82828283 0.86046512 0.79591837]
 
-mean value: 0.8379158420394337
+mean value: 0.8323277763890184
 
 key: test_precision 
-value: [0.42857143 0.55555556        nan 0.6        0.625      0.42857143
- 0.66666667 0.5        0.57142857 0.625     ]
+value: [0.5        0.66666667        nan 0.6        0.625      0.625
+ 0.5        0.66666667 0.57142857 0.57142857]
 
 mean value: nan
 
 key: train_precision 
-value: [0.65671642 0.76363636 0.88095238 0.88       0.68852459 0.72413793
- 0.93023256 0.70175439 0.94736842 0.71186441]
+value: [0.62686567 0.76363636 0.82978723 0.87755102 0.75471698 0.72727273
+ 0.85106383 0.75925926 0.90243902 0.73584906]
 
-mean value: 0.7885187455634349
+mean value: 0.7828441168174185
 
 key: test_recall 
-value: [0.6 1.  nan 0.6 1.  0.6 0.8 0.8 0.8 1. ]
+value: [0.8 0.8 nan 0.6 1.  1.  0.4 0.8 0.8 0.8]
 
 mean value: nan
 
 key: train_recall 
-value: [0.97777778 0.93333333 0.82222222 0.97777778 0.93333333 0.93333333
- 0.88888889 0.88888889 0.8        0.93333333]
+value: [0.93333333 0.93333333 0.86666667 0.95555556 0.88888889 0.88888889
+ 0.88888889 0.91111111 0.82222222 0.86666667]
 
-mean value: 0.9088888888888889
+mean value: 0.8955555555555555
 
 key: test_roc_auc 
-value: [0.4 0.6 nan 0.6 0.7 0.4 0.7 0.5 0.6 0.7]
+value: [0.5 0.7 nan 0.6 0.7 0.7 0.5 0.7 0.6 0.6]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [0.73333333 0.82222222 0.85555556 0.92222222 0.75555556 0.78888889
- 0.91111111 0.75555556 0.87777778 0.77777778]
+value: [0.68888889 0.82222222 0.84444444 0.91111111 0.8        0.77777778
+ 0.86666667 0.81111111 0.86666667 0.77777778]
 
-mean value: 0.8200000000000001
+mean value: 0.8166666666666667
 
 key: test_jcc 
-value: [0.33333333 0.55555556        nan 0.42857143 0.625      0.33333333
- 0.57142857 0.44444444 0.5        0.625     ]
+value: [0.44444444 0.57142857        nan 0.42857143 0.625      0.625
+ 0.28571429 0.57142857 0.5        0.5       ]
 
 mean value: nan
 
 key: train_jcc 
-value: [0.64705882 0.72413793 0.74       0.8627451  0.65625    0.68852459
- 0.83333333 0.64516129 0.76595745 0.67741935]
+value: [0.6        0.72413793 0.73584906 0.84313725 0.68965517 0.66666667
+ 0.76923077 0.70689655 0.75510204 0.66101695]
 
-mean value: 0.7240587868070179
+mean value: 0.7151692392544453
 
-MCC on Blind test: 0.31
+MCC on Blind test: 0.07
 
-Accuracy on Blind test: 0.65
+Accuracy on Blind test: 0.52
 
 Model_name: Naive Bayes 
 Model func: BernoulliNB() 
@@ -12251,101 +12288,101 @@ Pipeline(steps=[('prep',
                 ('model', BernoulliNB())])
 
 key: fit_time 
-value: [0.00987244 0.00975513 0.00979519 0.00990796 0.00982952 0.00979829
- 0.00977135 0.00987482 0.00979662 0.01000142]
+value: [0.00936604 0.00989652 0.00981975 0.00979877 0.00879645 0.00878477
+ 0.00904775 0.0089612  0.00923443 0.00946689]
 
-mean value: 0.009840273857116699
+mean value: 0.009317255020141602
 
 key: score_time 
-value: [0.00938535 0.00935745 0.00481558 0.00939775 0.00938678 0.00941658
- 0.00940442 0.00948787 0.00941515 0.00943661]
+value: [0.0095799  0.00932693 0.0049901  0.00928187 0.00873828 0.00865293
+ 0.00907826 0.0087533  0.00945044 0.00921941]
 
-mean value: 0.008950352668762207
+mean value: 0.008707141876220703
 
 key: test_mcc 
-value: [0.33333333 0.21821789        nan 0.         0.6        0.5
- 0.5        0.40824829 0.2        0.6       ]
+value: [0.65465367 0.21821789        nan 0.         0.40824829 0.65465367
+ 0.21821789 0.         0.40824829 0.40824829]
 
 mean value: nan
 
 key: train_mcc 
-value: [0.71269665 0.64700558 0.53452248 0.68041382 0.62609903 0.58137767
- 0.58137767 0.62609903 0.69509522 0.53452248]
+value: [0.57906602 0.73624773 0.57906602 0.60540551 0.60238451 0.56056066
+ 0.64700558 0.67082039 0.62609903 0.64700558]
 
-mean value: 0.6219209651318979
+mean value: 0.6253661066190971
 
 key: test_accuracy 
-value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8]
+value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7]
 
 mean value: nan
 
 key: train_accuracy 
-value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889
- 0.78888889 0.81111111 0.84444444 0.76666667]
+value: [0.78888889 0.86666667 0.78888889 0.8        0.8        0.77777778
+ 0.82222222 0.83333333 0.81111111 0.82222222]
 
-mean value: 0.8088888888888889
+mean value: 0.8111111111111111
 
 key: test_fscore 
-value: [0.33333333 0.66666667        nan 0.54545455 0.8        0.57142857
- 0.57142857 0.72727273 0.6        0.8       ]
+value: [0.75       0.66666667        nan 0.54545455 0.72727273 0.75
+ 0.5        0.44444444 0.66666667 0.66666667]
 
 mean value: nan
 
 key: train_fscore 
-value: [0.85057471 0.81395349 0.75862069 0.81481481 0.8        0.77647059
- 0.77647059 0.8        0.83333333 0.75862069]
+value: [0.7816092  0.86046512 0.7816092  0.78571429 0.79069767 0.76190476
+ 0.81395349 0.82352941 0.8        0.81395349]
 
-mean value: 0.7982858904944852
+mean value: 0.8013436617630212
 
 key: test_precision 
-value: [1.         0.57142857        nan 0.5        0.8        1.
- 1.         0.66666667 0.6        0.8       ]
+value: [1.         0.57142857        nan 0.5        0.66666667 1.
+ 0.66666667 0.5        0.75       0.75      ]
 
 mean value: nan
 
 key: train_precision 
-value: [0.88095238 0.85365854 0.78571429 0.91666667 0.85       0.825
- 0.825      0.85       0.8974359  0.78571429]
+value: [0.80952381 0.90243902 0.80952381 0.84615385 0.82926829 0.82051282
+ 0.85365854 0.875      0.85       0.85365854]
 
-mean value: 0.8470142053068882
+mean value: 0.8449738675958188
 
 key: test_recall 
-value: [0.2 0.8 nan 0.6 0.8 0.4 0.4 0.8 0.6 0.8]
+value: [0.6 0.8 nan 0.6 0.8 0.6 0.4 0.4 0.6 0.6]
 
 mean value: nan
 
 key: train_recall 
-value: [0.82222222 0.77777778 0.73333333 0.73333333 0.75555556 0.73333333
- 0.73333333 0.75555556 0.77777778 0.73333333]
+value: [0.75555556 0.82222222 0.75555556 0.73333333 0.75555556 0.71111111
+ 0.77777778 0.77777778 0.75555556 0.77777778]
 
-mean value: 0.7555555555555555
+mean value: 0.7622222222222222
 
 key: test_roc_auc 
-value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8]
+value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889
- 0.78888889 0.81111111 0.84444444 0.76666667]
+value: [0.78888889 0.86666667 0.78888889 0.8        0.8        0.77777778
+ 0.82222222 0.83333333 0.81111111 0.82222222]
 
-mean value: 0.8088888888888889
+mean value: 0.8111111111111111
 
 key: test_jcc 
-value: [0.2        0.5               nan 0.375      0.66666667 0.4
- 0.4        0.57142857 0.42857143 0.66666667]
+value: [0.6        0.5               nan 0.375      0.57142857 0.6
+ 0.33333333 0.28571429 0.5        0.5       ]
 
 mean value: nan
 
 key: train_jcc 
-value: [0.74       0.68627451 0.61111111 0.6875     0.66666667 0.63461538
- 0.63461538 0.66666667 0.71428571 0.61111111]
+value: [0.64150943 0.75510204 0.64150943 0.64705882 0.65384615 0.61538462
+ 0.68627451 0.7        0.66666667 0.68627451]
 
-mean value: 0.665284654887596
+mean value: 0.6693626187775545
 
-MCC on Blind test: 0.12
+MCC on Blind test: 0.18
 
-Accuracy on Blind test: 0.6
+Accuracy on Blind test: 0.65
 
 Model_name: K-Nearest Neighbors 
 Model func: KNeighborsClassifier() 
@@ -12378,66 +12415,64 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', KNeighborsClassifier())])
 
 key: fit_time 
-value: [0.00961757 0.00889277 0.009341   0.00934768 0.00927901 0.0093441
- 0.00930309 0.00911593 0.00942564 0.00927877]
+value: [0.00885844 0.00982785 0.00806379 0.00800562 0.00807977 0.00805044
+ 0.00798988 0.00812101 0.00803423 0.00798821]
 
-mean value: 0.009294557571411132
+mean value: 0.008301925659179688
 
 key: score_time 
-value: [0.01543903 0.010463   0.00477624 0.01466155 0.01007581 0.01023936
- 0.01033044 0.00999165 0.01001239 0.01013422]
+value: [0.01715279 0.01941204 0.00412107 0.00909305 0.00904918 0.00910521
+ 0.01407719 0.01381826 0.01290107 0.01411104]
 
-mean value: 0.0106123685836792
+mean value: 0.012284088134765624
 
 key: test_mcc 
-value: [ 0.40824829  0.65465367         nan  0.2         0.81649658  0.
-  0.2         0.21821789 -0.21821789  0.2       ]
+value: [ 0.81649658  0.6                nan  0.40824829  0.40824829  0.
+  0.40824829  0.         -0.33333333  0.        ]
 
 mean value: nan
 
 key: train_mcc 
-value: [0.57792049 0.51161666 0.51161666 0.60238451 0.64700558 0.58137767
- 0.64508188 0.60059347 0.62237591 0.51161666]
+value: [0.6        0.62237591 0.66683134 0.60059347 0.67082039 0.53990552
+ 0.64444444 0.57906602 0.71128676 0.51111111]
 
-mean value: 0.5811589508465446
+mean value: 0.6146434979838011
 
 key: test_accuracy 
-value: [0.7 0.8 nan 0.6 0.9 0.5 0.6 0.6 0.4 0.6]
+value: [0.9 0.8 nan 0.7 0.7 0.5 0.7 0.5 0.4 0.5]
 
 mean value: nan
 
 key: train_accuracy 
-value: [0.78888889 0.75555556 0.75555556 0.8        0.82222222 0.78888889
- 0.82222222 0.8        0.81111111 0.75555556]
+value: [0.8        0.81111111 0.83333333 0.8        0.83333333 0.76666667
+ 0.82222222 0.78888889 0.85555556 0.75555556]
 
-mean value: 0.79
+mean value: 0.8066666666666666
 
 key: test_fscore 
-value: [0.66666667 0.75              nan 0.6        0.90909091 0.44444444
- 0.6        0.66666667 0.5        0.6       ]
+value: [0.88888889 0.8               nan 0.66666667 0.72727273 0.44444444
+ 0.66666667 0.54545455 0.57142857 0.44444444]
 
 mean value: nan
 
 key: train_fscore 
-value: [0.79120879 0.75       0.75       0.79069767 0.82978723 0.8
- 0.81818182 0.79545455 0.80898876 0.75      ]
+value: [0.8        0.81318681 0.83516484 0.79545455 0.84210526 0.78350515
+ 0.82222222 0.7816092  0.85714286 0.75555556]
 
-mean value: 0.7884318827351257
+mean value: 0.8085946441926197
 
 key: test_precision 
-value: [0.75       1.                nan 0.6        0.83333333 0.5
- 0.6        0.57142857 0.42857143 0.6       ]
+value: [1.         0.8               nan 0.75       0.66666667 0.5
+ 0.75       0.5        0.44444444 0.5       ]
 
 mean value: nan
 
 key: train_precision 
-value: [0.7826087  0.76744186 0.76744186 0.82926829 0.79591837 0.76
- 0.8372093  0.81395349 0.81818182 0.76744186]
+value: [0.8        0.80434783 0.82608696 0.81395349 0.8        0.73076923
+ 0.82222222 0.80952381 0.84782609 0.75555556]
 
-mean value: 0.7939465545956881
-
-key: test_recall 
-value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+mean value: 0.8010285176008128
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
     return cache[method]
@@ -12485,42 +12520,44 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
   warnings.warn(
-[0.6 0.6 nan 0.6 1.  0.4 0.6 0.8 0.6 0.6]
+
+key: test_recall 
+value: [0.8 0.8 nan 0.6 0.8 0.4 0.6 0.6 0.8 0.4]
 
 mean value: nan
 
 key: train_recall 
-value: [0.8        0.73333333 0.73333333 0.75555556 0.86666667 0.84444444
- 0.8        0.77777778 0.8        0.73333333]
+value: [0.8        0.82222222 0.84444444 0.77777778 0.88888889 0.84444444
+ 0.82222222 0.75555556 0.86666667 0.75555556]
 
-mean value: 0.7844444444444445
+mean value: 0.8177777777777777
 
 key: test_roc_auc 
-value: [0.7 0.8 nan 0.6 0.9 0.5 0.6 0.6 0.4 0.6]
+value: [0.9 0.8 nan 0.7 0.7 0.5 0.7 0.5 0.4 0.5]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [0.78888889 0.75555556 0.75555556 0.8        0.82222222 0.78888889
- 0.82222222 0.8        0.81111111 0.75555556]
+value: [0.8        0.81111111 0.83333333 0.8        0.83333333 0.76666667
+ 0.82222222 0.78888889 0.85555556 0.75555556]
 
-mean value: 0.79
+mean value: 0.8066666666666666
 
 key: test_jcc 
-value: [0.5        0.6               nan 0.42857143 0.83333333 0.28571429
- 0.42857143 0.5        0.33333333 0.42857143]
+value: [0.8        0.66666667        nan 0.5        0.57142857 0.28571429
+ 0.5        0.375      0.4        0.28571429]
 
 mean value: nan
 
 key: train_jcc 
-value: [0.65454545 0.6        0.6        0.65384615 0.70909091 0.66666667
- 0.69230769 0.66037736 0.67924528 0.6       ]
+value: [0.66666667 0.68518519 0.71698113 0.66037736 0.72727273 0.6440678
+ 0.69811321 0.64150943 0.75       0.60714286]
 
-mean value: 0.651607951796631
+mean value: 0.6797316364953078
 
-MCC on Blind test: 0.08
+MCC on Blind test: 0.0
 
-Accuracy on Blind test: 0.57
+Accuracy on Blind test: 0.5
 
 Model_name: SVM 
 Model func: SVC(random_state=42) 
@@ -12553,101 +12590,101 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', SVC(random_state=42))])
 
 key: fit_time 
-value: [0.00917888 0.00928307 0.0092535  0.00920248 0.00919914 0.00918078
- 0.00919771 0.00921726 0.00927758 0.00922322]
+value: [0.0089519  0.0091002  0.00914049 0.00959015 0.01245856 0.01036382
+ 0.01422167 0.01016212 0.01498318 0.01038194]
 
-mean value: 0.009221363067626952
+mean value: 0.010935401916503907
 
 key: score_time 
-value: [0.00861764 0.0091691  0.0043776  0.00870633 0.008672   0.0087018
- 0.00878572 0.00866151 0.00867128 0.008708  ]
+value: [0.00858164 0.00850725 0.00424981 0.01098704 0.01503611 0.00963306
+ 0.01035309 0.00957203 0.01383162 0.0097847 ]
 
-mean value: 0.008307099342346191
+mean value: 0.010053634643554688
 
 key: test_mcc 
-value: [0.2        0.6               nan 0.40824829 0.81649658 0.65465367
- 0.2        0.21821789 0.21821789 1.        ]
+value: [0.40824829 0.6               nan 0.40824829 0.65465367 1.
+ 0.40824829 0.21821789 0.21821789 0.65465367]
 
 mean value: nan
 
 key: train_mcc 
-value: [0.84632727 0.80498447 0.82548988 0.87447463 0.82222222 0.80498447
- 0.91473203 0.87011096 0.84970583 0.82548988]
+value: [0.86666667 0.88910845 0.91201231 0.80178373 0.8675239  0.86666667
+ 0.91201231 0.84632727 0.8675239  0.80178373]
 
-mean value: 0.8438521657272541
+mean value: 0.8631408926213697
 
 key: test_accuracy 
-value: [0.6 0.8 nan 0.7 0.9 0.8 0.6 0.6 0.6 1. ]
+value: [0.7 0.8 nan 0.7 0.8 1.  0.7 0.6 0.6 0.8]
 
 mean value: nan
 
 key: train_accuracy 
-value: [0.92222222 0.9        0.91111111 0.93333333 0.91111111 0.9
- 0.95555556 0.93333333 0.92222222 0.91111111]
+value: [0.93333333 0.94444444 0.95555556 0.9        0.93333333 0.93333333
+ 0.95555556 0.92222222 0.93333333 0.9       ]
 
-mean value: 0.92
+mean value: 0.9311111111111111
 
 key: test_fscore 
-value: [0.6        0.8               nan 0.66666667 0.90909091 0.75
- 0.6        0.66666667 0.66666667 1.        ]
+value: [0.72727273 0.8               nan 0.66666667 0.83333333 1.
+ 0.66666667 0.66666667 0.66666667 0.75      ]
 
 mean value: nan
 
 key: train_fscore 
-value: [0.91954023 0.89411765 0.90697674 0.92857143 0.91111111 0.89411765
- 0.95348837 0.93023256 0.91764706 0.90697674]
+value: [0.93333333 0.94382022 0.95454545 0.89655172 0.93181818 0.93333333
+ 0.95652174 0.91954023 0.93181818 0.89655172]
 
-mean value: 0.9162779541113425
+mean value: 0.929783412685894
 
 key: test_precision 
-value: [0.6        0.8               nan 0.75       0.83333333 1.
- 0.6        0.57142857 0.57142857 1.        ]
+value: [0.66666667 0.8               nan 0.75       0.71428571 1.
+ 0.75       0.57142857 0.57142857 1.        ]
 
 mean value: nan
 
 key: train_precision 
-value: [0.95238095 0.95       0.95121951 1.         0.91111111 0.95
- 1.         0.97560976 0.975      0.95121951]
+value: [0.93333333 0.95454545 0.97674419 0.92857143 0.95348837 0.93333333
+ 0.93617021 0.95238095 0.95348837 0.92857143]
 
-mean value: 0.9616540843979868
+mean value: 0.9450627073734447
 
 key: test_recall 
-value: [0.6 0.8 nan 0.6 1.  0.6 0.6 0.8 0.8 1. ]
+value: [0.8 0.8 nan 0.6 1.  1.  0.6 0.8 0.8 0.6]
 
 mean value: nan
 
 key: train_recall 
-value: [0.88888889 0.84444444 0.86666667 0.86666667 0.91111111 0.84444444
- 0.91111111 0.88888889 0.86666667 0.86666667]
+value: [0.93333333 0.93333333 0.93333333 0.86666667 0.91111111 0.93333333
+ 0.97777778 0.88888889 0.91111111 0.86666667]
 
-mean value: 0.8755555555555555
+mean value: 0.9155555555555556
 
 key: test_roc_auc 
-value: [0.6 0.8 nan 0.7 0.9 0.8 0.6 0.6 0.6 1. ]
+value: [0.7 0.8 nan 0.7 0.8 1.  0.7 0.6 0.6 0.8]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [0.92222222 0.9        0.91111111 0.93333333 0.91111111 0.9
- 0.95555556 0.93333333 0.92222222 0.91111111]
+value: [0.93333333 0.94444444 0.95555556 0.9        0.93333333 0.93333333
+ 0.95555556 0.92222222 0.93333333 0.9       ]
 
-mean value: 0.92
+mean value: 0.9311111111111111
 
 key: test_jcc 
-value: [0.42857143 0.66666667        nan 0.5        0.83333333 0.6
- 0.42857143 0.5        0.5        1.        ]
+value: [0.57142857 0.66666667        nan 0.5        0.71428571 1.
+ 0.5        0.5        0.5        0.6       ]
 
 mean value: nan
 
 key: train_jcc 
-value: [0.85106383 0.80851064 0.82978723 0.86666667 0.83673469 0.80851064
- 0.91111111 0.86956522 0.84782609 0.82978723]
+value: [0.875      0.89361702 0.91304348 0.8125     0.87234043 0.875
+ 0.91666667 0.85106383 0.87234043 0.8125    ]
 
-mean value: 0.845956335047124
+mean value: 0.8694071847055196
 
-MCC on Blind test: 0.37
+MCC on Blind test: 0.21
 
-Accuracy on Blind test: 0.72
+Accuracy on Blind test: 0.65
 
 Model_name: MLP 
 Model func: MLPClassifier(max_iter=500, random_state=42) 
@@ -12728,20 +12765,20 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', MLPClassifier(max_iter=500, random_state=42))])
 
 key: fit_time 
-value: [0.36695099 0.50412679 0.45356798 0.3968966  0.37350249 0.40489578
- 0.58160663 0.40739989 0.36945391 0.39499092]
+value: [0.44935966 0.68770981 0.63709402 0.90934205 1.31588888 0.48195362
+ 0.40868592 0.56449747 0.35664296 0.61879086]
 
-mean value: 0.42533919811248777
+mean value: 0.6429965257644653
 
 key: score_time 
-value: [0.01202917 0.01205993 0.00681949 0.01205707 0.01209044 0.0119729
- 0.01197028 0.0120163  0.01199508 0.01207495]
+value: [0.01234674 0.01238608 0.00677371 0.01211309 0.01210904 0.01201606
+ 0.01210451 0.01206374 0.01207805 0.0169487 ]
 
-mean value: 0.011508560180664063
+mean value: 0.01209397315979004
 
 key: test_mcc 
-value: [0.         0.81649658        nan 0.40824829 0.6        0.81649658
- 0.81649658 0.5        0.65465367 0.81649658]
+value: [0.6        0.81649658        nan 0.2        0.81649658 0.81649658
+ 0.40824829 0.65465367 0.21821789 0.81649658]
 
 mean value: nan
 
@@ -12751,7 +12788,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.5 0.9 nan 0.7 0.8 0.9 0.9 0.7 0.8 0.9]
+value: [0.8 0.9 nan 0.6 0.9 0.9 0.7 0.8 0.6 0.9]
 
 mean value: nan
 
@@ -12761,8 +12798,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.28571429 0.90909091        nan 0.72727273 0.8        0.88888889
- 0.90909091 0.76923077 0.83333333 0.90909091]
+value: [0.8        0.90909091        nan 0.6        0.90909091 0.88888889
+ 0.66666667 0.83333333 0.66666667 0.90909091]
 
 mean value: nan
 
@@ -12772,8 +12809,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.5        0.83333333        nan 0.66666667 0.8        1.
- 0.83333333 0.625      0.71428571 0.83333333]
+value: [0.8        0.83333333        nan 0.6        0.83333333 1.
+ 0.75       0.71428571 0.57142857 0.83333333]
 
 mean value: nan
 
@@ -12783,7 +12820,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.2 1.  nan 0.8 0.8 0.8 1.  1.  1.  1. ]
+value: [0.8 1.  nan 0.6 1.  0.8 0.6 1.  0.8 1. ]
 
 mean value: nan
 
@@ -12793,7 +12830,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.5 0.9 nan 0.7 0.8 0.9 0.9 0.7 0.8 0.9]
+value: [0.8 0.9 nan 0.6 0.9 0.9 0.7 0.8 0.6 0.9]
 
 mean value: nan
 
@@ -12803,8 +12840,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.16666667 0.83333333        nan 0.57142857 0.66666667 0.8
- 0.83333333 0.625      0.71428571 0.83333333]
+value: [0.66666667 0.83333333        nan 0.42857143 0.83333333 0.8
+ 0.5        0.71428571 0.5        0.83333333]
 
 mean value: nan
 
@@ -12944,544 +12981,19 @@ Pipeline(steps=[('prep',
                 ('model', DecisionTreeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.01837254 0.01201224 0.01022887 0.01003718 0.00986743 0.00974107
- 0.00972939 0.00980163 0.00968575 0.00952888]
+value: [0.01760101 0.01270008 0.010602   0.01664591 0.01388907 0.0104475
+ 0.01092863 0.01405859 0.01037669 0.01104903]
 
-mean value: 0.010900497436523438
+mean value: 0.012829852104187012
 
 key: score_time 
-value: [0.01520014 0.0088861  0.00448728 0.00853777 0.00867915 0.00838041
- 0.00833416 0.0084734  0.00840139 0.00852203]
+value: [0.01295376 0.00925541 0.00528264 0.01082516 0.01204586 0.00963569
+ 0.01535797 0.00973344 0.00954556 0.00905228]
 
-mean value: 0.008790183067321777
+mean value: 0.010368776321411134
 
 key: test_mcc 
-value: [0.6        0.81649658        nan 0.6        0.81649658 0.81649658
- 1.         0.21821789 0.65465367 0.81649658]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [0.8 0.9 nan 0.8 0.9 0.9 1.  0.6 0.8 0.9]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [0.8        0.90909091        nan 0.8        0.90909091 0.88888889
- 1.         0.66666667 0.83333333 0.88888889]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [0.8        0.83333333        nan 0.8        0.83333333 1.
- 1.         0.57142857 0.71428571 1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.8 1.  nan 0.8 1.  0.8 1.  0.8 1.  0.8]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.8 0.9 nan 0.8 0.9 0.9 1.  0.6 0.8 0.9]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [0.66666667 0.83333333        nan 0.66666667 0.83333333 0.8
- 1.         0.5        0.71428571 0.8       ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-MCC on Blind test: 0.89
-
-Accuracy on Blind test: 0.95
-
-Model_name: Extra Trees 
-Model func: ExtraTreesClassifier(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', ExtraTreesClassifier(random_state=42))])
-
-key: fit_time 
-value: [0.08040762 0.08057427 0.08129692 0.08119392 0.08340263 0.08066964
- 0.08185148 0.08536839 0.0816021  0.08244801]
-
-mean value: 0.08188149929046631
-
-key: score_time 
-value: [0.01654506 0.01672173 0.0045855  0.01680946 0.0167408  0.01680946
- 0.01687789 0.02278209 0.01735115 0.01731062]
-
-mean value: 0.01625337600708008
-
-key: test_mcc 
-value: [0.65465367 0.40824829        nan 0.65465367 0.81649658 0.6
- 0.81649658 0.5        0.5        1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [0.8 0.7 nan 0.8 0.9 0.8 0.9 0.7 0.7 1. ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [0.75       0.72727273        nan 0.75       0.90909091 0.8
- 0.90909091 0.76923077 0.76923077 1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [1.         0.66666667        nan 1.         0.83333333 0.8
- 0.83333333 0.625      0.625      1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.6 0.8 nan 0.6 1.  0.8 1.  1.  1.  1. ]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.8 0.7 nan 0.8 0.9 0.8 0.9 0.7 0.7 1. ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [0.6        0.57142857        nan 0.6        0.83333333 0.66666667
- 0.83333333 0.625      0.625      1.        ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-MCC on Blind test: 0.36
-
-Accuracy on Blind test: 0.72
-
-Model_name: Extra Tree 
-Model func: ExtraTreeClassifier(random_state=42) 
-List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', ExtraTreeClassifier(random_state=42))])
-
-key: fit_time 
-value: [0.00936699 0.00906682 0.00868106 0.00933576 0.00893283 0.00876093
- 0.00901222 0.00940871 0.00933743 0.00886393]
-
-mean value: 0.009076666831970216
-
-key: score_time 
-value: [0.00862956 0.00860763 0.00448084 0.00889158 0.00875568 0.00860119
- 0.00875163 0.0087719  0.00861621 0.00862575]
-
-mean value: 0.00827319622039795
-
-key: test_mcc 
-value: [0.21821789 0.40824829        nan 0.81649658 0.81649658 0.81649658
- 0.65465367 0.40824829 0.65465367 0.5       ]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [0.6 0.7 nan 0.9 0.9 0.9 0.8 0.7 0.8 0.7]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [0.5        0.72727273        nan 0.90909091 0.88888889 0.88888889
- 0.83333333 0.72727273 0.83333333 0.76923077]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [0.66666667 0.66666667        nan 0.83333333 1.         1.
- 0.71428571 0.66666667 0.71428571 0.625     ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.4 0.8 nan 1.  0.8 0.8 1.  0.8 1.  1. ]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.6 0.7 nan 0.9 0.9 0.9 0.8 0.7 0.8 0.7]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [0.33333333 0.57142857        nan 0.83333333 0.8        0.8
- 0.71428571 0.57142857 0.71428571 0.625     ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-MCC on Blind test: -0.04
-
-Accuracy on Blind test: 0.48
-
-Model_name: Random Forest 
-Model func: RandomForestClassifier(n_estimators=1000, random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model',
-                 RandomForestClassifier(n_estimators=1000, random_state=42))])
-
-key: fit_time 
-value: [1.02382302 1.05082822 1.03594112 1.02103114 1.04056716 1.02298737
- 1.02116394 1.020087   1.01779222 1.02446771]
-
-mean value: 1.027868890762329
-
-key: score_time 
-value: [0.16805339 0.09164691 0.00471544 0.08566546 0.09267664 0.08607197
- 0.09215879 0.09033847 0.09076619 0.08630776]
-
-mean value: 0.08884010314941407
-
-key: test_mcc 
-value: [0.65465367 0.65465367        nan 0.65465367 0.81649658 0.81649658
+value: [0.81649658 0.81649658        nan 0.81649658 1.         0.81649658
  1.         0.21821789 0.65465367 1.        ]
 
 mean value: nan
@@ -13492,7 +13004,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.8 0.8 nan 0.8 0.9 0.9 1.  0.6 0.8 1. ]
+value: [0.9 0.9 nan 0.9 1.  0.9 1.  0.6 0.8 1. ]
 
 mean value: nan
 
@@ -13502,7 +13014,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.75       0.83333333        nan 0.75       0.90909091 0.88888889
+value: [0.90909091 0.90909091        nan 0.88888889 1.         0.88888889
  1.         0.66666667 0.83333333 1.        ]
 
 mean value: nan
@@ -13513,7 +13025,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [1.         0.71428571        nan 1.         0.83333333 1.
+value: [0.83333333 0.83333333        nan 1.         1.         1.
  1.         0.57142857 0.71428571 1.        ]
 
 mean value: nan
@@ -13524,7 +13036,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.6 1.  nan 0.6 1.  0.8 1.  0.8 1.  1. ]
+value: [1.  1.  nan 0.8 1.  0.8 1.  0.8 1.  1. ]
 
 mean value: nan
 
@@ -13534,7 +13046,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.8 0.8 nan 0.8 0.9 0.9 1.  0.6 0.8 1. ]
+value: [0.9 0.9 nan 0.9 1.  0.9 1.  0.6 0.8 1. ]
 
 mean value: nan
 
@@ -13544,7 +13056,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.6        0.71428571        nan 0.6        0.83333333 0.8
+value: [0.83333333 0.83333333        nan 0.8        1.         0.8
  1.         0.5        0.71428571 1.        ]
 
 mean value: nan
@@ -13554,4439 +13066,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: 0.48
-
-Accuracy on Blind test: 0.78
-
-Model_name: Random Forest2 
-Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model',
-                 RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                                        n_estimators=1000, n_jobs=10,
-                                        oob_score=True, random_state=42))])
-
-key: fit_time 
-value: [0.79851103 0.84037805 0.94003201 0.90074062 0.83170342 0.8777082
- 0.84478021 0.83035374 0.87192893 0.84235287]
-
-mean value: 0.8578489065170288
-
-key: score_time 
-value: [0.18074512 0.13748908 0.00518966 0.15447927 0.22545338 0.22390962
- 0.19519472 0.20300436 0.18351603 0.17219543]
-
-mean value: 0.16811766624450683
-
-key: test_mcc 
-value: [0.6        0.81649658        nan 0.65465367 1.         0.65465367
- 0.81649658 0.21821789 0.81649658 1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.97801929 1.         1.         1.         0.97801929 0.95650071
- 0.97801929 1.         1.         1.        ]
-
-mean value: 0.9890558596126232
-
-key: test_accuracy 
-value: [0.8 0.9 nan 0.8 1.  0.8 0.9 0.6 0.9 1. ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.98888889 1.         1.         1.         0.98888889 0.97777778
- 0.98888889 1.         1.         1.        ]
-
-mean value: 0.9944444444444445
-
-key: test_fscore 
-value: [0.8        0.90909091        nan 0.75       1.         0.75
- 0.88888889 0.66666667 0.90909091 1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.98876404 1.         1.         1.         0.98901099 0.97727273
- 0.98876404 1.         1.         1.        ]
-
-mean value: 0.9943811806171357
-
-key: test_precision 
-value: [0.8        0.83333333        nan 1.         1.         1.
- 1.         0.57142857 0.83333333 1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1.         1.         1.         1.         0.97826087 1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9978260869565218
-
-key: test_recall 
-value: [0.8 1.  nan 0.6 1.  0.6 0.8 0.8 1.  1. ]
-
-mean value: nan
-
-key: train_recall 
-value: [0.97777778 1.         1.         1.         1.         0.95555556
- 0.97777778 1.         1.         1.        ]
-
-mean value: 0.991111111111111
-
-key: test_roc_auc 
-value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-[0.8 0.9 nan 0.8 1.  0.8 0.9 0.6 0.9 1. ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [0.98888889 1.         1.         1.         0.98888889 0.97777778
- 0.98888889 1.         1.         1.        ]
-
-mean value: 0.9944444444444445
-
-key: test_jcc 
-value: [0.66666667 0.83333333        nan 0.6        1.         0.6
- 0.8        0.5        0.83333333 1.        ]
-
-mean value: nan
-
-key: train_jcc 
-value: [0.97777778 1.         1.         1.         0.97826087 0.95555556
- 0.97777778 1.         1.         1.        ]
-
-mean value: 0.9889371980676328
-
-MCC on Blind test: 0.6
-
-Accuracy on Blind test: 0.82
-
-Model_name: Naive Bayes 
-Model func: BernoulliNB() 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', BernoulliNB())])
-
-key: fit_time 
-value: [0.02122736 0.00870228 0.00875449 0.00875592 0.00878906 0.00892305
- 0.00867987 0.00895619 0.00886059 0.00871706]
-
-mean value: 0.010036587715148926
-
-key: score_time 
-value: [0.01413703 0.0085597  0.00445867 0.00852251 0.00865173 0.00862718
- 0.00861049 0.0086062  0.00850129 0.00863886]
-
-mean value: 0.008731365203857422
-
-key: test_mcc 
-value: [0.33333333 0.21821789        nan 0.         0.6        0.5
- 0.5        0.40824829 0.2        0.6       ]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.71269665 0.64700558 0.53452248 0.68041382 0.62609903 0.58137767
- 0.58137767 0.62609903 0.69509522 0.53452248]
-
-mean value: 0.6219209651318979
-
-key: test_accuracy 
-value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889
- 0.78888889 0.81111111 0.84444444 0.76666667]
-
-mean value: 0.8088888888888889
-
-key: test_fscore 
-value: [0.33333333 0.66666667        nan 0.54545455 0.8        0.57142857
- 0.57142857 0.72727273 0.6        0.8       ]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.85057471 0.81395349 0.75862069 0.81481481 0.8        0.77647059
- 0.77647059 0.8        0.83333333 0.75862069]
-
-mean value: 0.7982858904944852
-
-key: test_precision 
-value: [1.         0.57142857        nan 0.5        0.8        1.
- 1.         0.66666667 0.6        0.8       ]
-
-mean value: nan
-
-key: train_precision 
-value: [0.88095238 0.85365854 0.78571429 0.91666667 0.85       0.825
- 0.825      0.85       0.8974359  0.78571429]
-
-mean value: 0.8470142053068882
-
-key: test_recall 
-value: [0.2 0.8 nan 0.6 0.8 0.4 0.4 0.8 0.6 0.8]
-
-mean value: nan
-
-key: train_recall 
-value: [0.82222222 0.77777778 0.73333333 0.73333333 0.75555556 0.73333333
- 0.73333333 0.75555556 0.77777778 0.73333333]
-
-mean value: 0.7555555555555555
-
-key: test_roc_auc 
-value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889
- 0.78888889 0.81111111 0.84444444 0.76666667]
-
-mean value: 0.8088888888888889
-
-key: test_jcc 
-value: [0.2        0.5               nan 0.375      0.66666667 0.4
- 0.4        0.57142857 0.42857143 0.66666667]
-
-mean value: nan
-
-key: train_jcc 
-value: [0.74       0.68627451 0.61111111 0.6875     0.66666667 0.63461538
- 0.63461538 0.66666667 0.71428571 0.61111111]
-
-mean value: 0.665284654887596
-
-MCC on Blind test: 0.12
-
-Accuracy on Blind test: 0.6
-
-Model_name: XGBoost 
-Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0) 
-List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000...
-                               interaction_constraints=None, learning_rate=None,
-                               max_delta_step=None, max_depth=None,
-                               min_child_weight=None, missing=nan,
-                               monotone_constraints=None, n_estimators=100,
-                               n_jobs=None, num_parallel_tree=None,
-                               predictor=None, random_state=42, reg_alpha=None,
-                               reg_lambda=None, scale_pos_weight=None,
-                               subsample=None, tree_method=None,
-                               use_label_encoder=False,
-                               validate_parameters=None, verbosity=0))])
-
-key: fit_time 
-value: [0.06223869 0.05609274 0.04758739 0.18425703 0.03499866 0.03480005
- 0.05428028 0.03800774 0.06300688 0.03989649]
-
-mean value: 0.06151659488677978
-
-key: score_time 
-value: [0.01010013 0.0104847  0.00464034 0.01078033 0.01093912 0.01051426
- 0.01065564 0.0102036  0.01068449 0.0109179 ]
-
-mean value: 0.009992051124572753
-
-key: test_mcc 
-value: [0.81649658 0.81649658        nan 1.         1.         0.81649658
- 1.         0.6        0.65465367 0.81649658]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [0.9 0.9 nan 1.  1.  0.9 1.  0.8 0.8 0.9]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [0.88888889 0.90909091        nan 1.         1.         0.90909091
- 1.         0.8        0.83333333 0.88888889]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [1.         0.83333333        nan 1.         1.         0.83333333
- 1.         0.8        0.71428571 1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.8 1.  nan 1.  1.  1.  1.  0.8 1.  0.8]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.9 0.9 nan 1.  1.  0.9 1.  0.8 0.8 0.9]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [0.8        0.83333333        nan 1.         1.         0.83333333
- 1.         0.66666667 0.71428571 0.8       ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
 MCC on Blind test: 0.89
 
 Accuracy on Blind test: 0.95
 
-Model_name: LDA 
-Model func: LinearDiscriminantAnalysis() 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', LinearDiscriminantAnalysis())])
-
-key: fit_time 
-value: [0.0236156  0.04939389 0.06203413 0.03593135 0.03797984 0.04009914
- 0.04238033 0.03859186 0.03845286 0.04021764]
-
-mean value: 0.04086966514587402
-
-key: score_time 
-value: [0.02123761 0.0344398  0.0061059  0.0222311  0.02070427 0.02340961
- 0.01904702 0.02245331 0.02260351 0.02361083]
-
-mean value: 0.021584296226501466
-
-key: test_mcc 
-value: [0.         0.81649658        nan 0.81649658 1.         0.40824829
- 0.40824829 0.65465367 0.5        0.81649658]
-
-mean value: nan
-
-key: train_mcc 
-value: [1.         1.         1.         1.         1.         0.97801929
- 1.         1.         1.         1.        ]
-
-mean value: 0.9978019293843652
-
-key: test_accuracy 
-value: [0.5 0.9 nan 0.9 1.  0.7 0.7 0.8 0.7 0.9]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1.         1.         1.         1.         1.         0.98888889
- 1.         1.         1.         1.        ]
-
-mean value: 0.9988888888888889
-
-key: test_fscore 
-value: [0.44444444 0.90909091        nan 0.90909091 1.         0.72727273
- 0.72727273 0.83333333 0.76923077 0.90909091]
-
-mean value: nan
-
-key: train_fscore 
-value: [1.         1.         1.         1.         1.         0.98876404
- 1.         1.         1.         1.        ]
-
-mean value: 0.998876404494382
-
-key: test_precision 
-value: [0.5        0.83333333        nan 0.83333333 1.         0.66666667
- 0.66666667 0.71428571 0.625      0.83333333]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.4 1.  nan 1.  1.  0.8 0.8 1.  1.  1. ]
-
-mean value: nan
-
-key: train_recall 
-value: [1.         1.         1.         1.         1.         0.97777778
- 1.         1.         1.         1.        ]
-
-mean value: 0.9977777777777778
-
-key: test_roc_auc 
-value: [0.5 0.9 nan 0.9 1.  0.7 0.7 0.8 0.7 0.9]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1.         1.         1.         1.         1.         0.98888889
- 1.         1.         1.         1.        ]
-
-mean value: 0.9988888888888889
-
-key: test_jcc 
-value: [0.28571429 0.83333333        nan 0.83333333 1.         0.57142857
- 0.57142857 0.71428571 0.625      0.83333333]
-
-mean value: nan
-
-key: train_jcc 
-value: [1.         1.         1.         1.         1.         0.97777778
- 1.         1.         1.         1.        ]
-
-mean value: 0.9977777777777778
-
-MCC on Blind test: 0.01
-
-Accuracy on Blind test: 0.52
-
-Model_name: Multinomial 
-Model func: MultinomialNB() 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', MultinomialNB())])
-
-key: fit_time 
-value: [0.01126742 0.00883031 0.00877166 0.00853825 0.00856185 0.00841284
- 0.00843716 0.0084455  0.0083921  0.00846934]
-
-mean value: 0.008812642097473145
-
-key: score_time 
-value: [0.00877571 0.0087781  0.00430918 0.0083077  0.00839949 0.00832129
- 0.00838757 0.0083096  0.00846934 0.00847459]
-
-mean value: 0.008053255081176759
-
-key: test_mcc 
-value: [0.         0.2               nan 0.2        0.81649658 0.65465367
- 0.21821789 0.         0.         0.81649658]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.58137767 0.57792049 0.56056066 0.66683134 0.6        0.56056066
- 0.64700558 0.66683134 0.53990552 0.53665631]
-
-mean value: 0.5937649587083046
-
-key: test_accuracy 
-value: [0.5 0.6 nan 0.6 0.9 0.8 0.6 0.5 0.5 0.9]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.78888889 0.78888889 0.77777778 0.83333333 0.8        0.77777778
- 0.82222222 0.83333333 0.76666667 0.76666667]
-
-mean value: 0.7955555555555556
-
-key: test_fscore 
-value: [0.54545455 0.6               nan 0.6        0.90909091 0.75
- 0.5        0.44444444 0.61538462 0.90909091]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.77647059 0.78651685 0.76190476 0.83146067 0.8        0.76190476
- 0.81395349 0.83146067 0.74698795 0.75294118]
-
-mean value: 0.7863600930941919
-
-key: test_precision 
-value: [0.5        0.6               nan 0.6        0.83333333 1.
- 0.66666667 0.5        0.5        0.83333333]
-
-mean value: nan
-
-key: train_precision 
-value: [0.825      0.79545455 0.82051282 0.84090909 0.8        0.82051282
- 0.85365854 0.84090909 0.81578947 0.8       ]
-
-mean value: 0.8212746378567944
-
-key: test_recall 
-value: [0.6 0.6 nan 0.6 1.  0.6 0.4 0.4 0.8 1. ]
-
-mean value: nan
-
-key: train_recall 
-value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-[0.73333333 0.77777778 0.71111111 0.82222222 0.8        0.71111111
- 0.77777778 0.82222222 0.68888889 0.71111111]
-
-mean value: 0.7555555555555555
-
-key: test_roc_auc 
-value: [0.5 0.6 nan 0.6 0.9 0.8 0.6 0.5 0.5 0.9]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [0.78888889 0.78888889 0.77777778 0.83333333 0.8        0.77777778
- 0.82222222 0.83333333 0.76666667 0.76666667]
-
-mean value: 0.7955555555555556
-
-key: test_jcc 
-value: [0.375      0.42857143        nan 0.42857143 0.83333333 0.6
- 0.33333333 0.28571429 0.44444444 0.83333333]
-
-mean value: nan
-
-key: train_jcc 
-value: [0.63461538 0.64814815 0.61538462 0.71153846 0.66666667 0.61538462
- 0.68627451 0.71153846 0.59615385 0.60377358]
-
-mean value: 0.6489478294139781
-
-MCC on Blind test: 0.3
-
-Accuracy on Blind test: 0.68
-
-Model_name: Passive Aggresive 
-Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model',
-                 PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
-
-key: fit_time 
-value: [0.00969601 0.01305962 0.01228189 0.01330137 0.01364422 0.01439548
- 0.01438236 0.01451087 0.01334405 0.01380467]
-
-mean value: 0.013242053985595702
-
-key: score_time 
-value: [0.0084312  0.01083541 0.0055151  0.01124859 0.01355529 0.0133152
- 0.03446317 0.01134872 0.01132798 0.01131558]
-
-mean value: 0.013135623931884766
-
-key: test_mcc 
-value: [0.         0.65465367        nan 0.6        0.6        0.81649658
- 0.65465367 0.81649658 0.33333333 0.81649658]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.95555556 0.91473203 0.91111111 0.95555556 0.95650071 0.95650071
- 0.81649658 0.87447463 0.79772404 0.85485041]
-
-mean value: 0.8993501347937264
-
-key: test_accuracy 
-value: [0.5 0.8 nan 0.8 0.8 0.9 0.8 0.9 0.6 0.9]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.97777778 0.95555556 0.95555556 0.97777778 0.97777778 0.97777778
- 0.9        0.93333333 0.88888889 0.92222222]
-
-mean value: 0.9466666666666667
-
-key: test_fscore 
-value: [0.28571429 0.83333333        nan 0.8        0.8        0.88888889
- 0.75       0.88888889 0.71428571 0.88888889]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.97777778 0.95744681 0.95555556 0.97777778 0.97826087 0.97826087
- 0.88888889 0.92857143 0.9        0.91566265]
-
-mean value: 0.9458202626814911
-
-key: test_precision 
-value: [0.5        0.71428571        nan 0.8        0.8        1.
- 1.         1.         0.55555556 1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [0.97777778 0.91836735 0.95555556 0.97777778 0.95744681 0.95744681
- 1.         1.         0.81818182 1.        ]
-
-mean value: 0.9562553893252982
-
-key: test_recall 
-value: [0.2 1.  nan 0.8 0.8 0.8 0.6 0.8 1.  0.8]
-
-mean value: nan
-
-key: train_recall 
-value: [0.97777778 1.         0.95555556 0.97777778 1.         1.
- 0.8        0.86666667 1.         0.84444444]
-
-mean value: 0.9422222222222222
-
-key: test_roc_auc 
-value: [0.5 0.8 nan 0.8 0.8 0.9 0.8 0.9 0.6 0.9]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [0.97777778 0.95555556 0.95555556 0.97777778 0.97777778 0.97777778
- 0.9        0.93333333 0.88888889 0.92222222]
-
-mean value: 0.9466666666666667
-
-key: test_jcc 
-value: [0.16666667 0.71428571        nan 0.66666667 0.66666667 0.8
- 0.6        0.8        0.55555556 0.8       ]
-
-mean value: nan
-
-key: train_jcc 
-value: [0.95652174 0.91836735 0.91489362 0.95652174 0.95744681 0.95744681
- 0.8        0.86666667 0.81818182 0.84444444]
-
-mean value: 0.8990490988535128
-
-MCC on Blind test: 0.15
-
-Accuracy on Blind test: 0.6
-
-Model_name: Stochastic GDescent 
-Model func: SGDClassifier(n_jobs=10, random_state=42) 
-List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', SGDClassifier(n_jobs=10, random_state=42))])
-
-key: fit_time 
-value: [0.01253462 0.01219559 0.01248789 0.01262569 0.01222777 0.01228476
- 0.01211166 0.01249099 0.01256251 0.01269126]
-
-mean value: 0.012421274185180664
-
-key: score_time 
-value: [0.01041532 0.01126981 0.00603032 0.01128006 0.01135278 0.01132679
- 0.01124406 0.01126432 0.01123857 0.0112381 ]
-
-mean value: 0.01066601276397705
-
-key: test_mcc 
-value: [0.         0.5               nan 0.6        0.6        0.65465367
- 0.40824829 0.6        0.6        1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.95555556 0.74278135 0.97801929 0.89442719 0.97801929 0.46499055
- 0.81649658 0.77919372 0.91473203 1.        ]
-
-mean value: 0.8524215579246943
-
-key: test_accuracy 
-value: [0.5 0.7 nan 0.8 0.8 0.8 0.7 0.8 0.8 1. ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.97777778 0.85555556 0.98888889 0.94444444 0.98888889 0.67777778
- 0.9        0.87777778 0.95555556 1.        ]
-
-mean value: 0.9166666666666666
-
-key: test_fscore 
-value: [0.44444444 0.76923077        nan 0.8        0.8        0.83333333
- 0.66666667 0.8        0.8        1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.97777778 0.87378641 0.98876404 0.94736842 0.98876404 0.75630252
- 0.88888889 0.86075949 0.95348837 1.        ]
-
-mean value: 0.9235899972146242
-
-key: test_precision 
-value: [0.5        0.625             nan 0.8        0.8        0.71428571
- 0.75       0.8        0.8        1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [0.97777778 0.77586207 1.         0.9        1.         0.60810811
- 1.         1.         1.         1.        ]
-
-mean value: 0.9261747954851403
-
-key: test_recall 
-value: [0.4 1.  nan 0.8 0.8 1.  0.6 0.8 0.8 1. ]
-
-mean value: nan
-
-key: train_recall 
-value: [0.97777778 1.         0.97777778 1.         0.97777778 1.
- 0.8        0.75555556 0.91111111 1.        ]
-
-mean value: 0.94
-
-key: test_roc_auc 
-value: [0.5 0.7 nan 0.8 0.8 0.8 0.7 0.8 0.8 1. ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [0.97777778 0.85555556 0.98888889 0.94444444 0.98888889 0.67777778
- 0.9        0.87777778 0.95555556 1.        ]
-
-mean value: 0.9166666666666666
-
-key: test_jcc 
-value: [0.28571429 0.625             nan 0.66666667 0.66666667 0.71428571
- 0.5        0.66666667 0.66666667 1.        ]
-
-mean value: nan
-
-key: train_jcc 
-value: [0.95652174 0.77586207 0.97777778 0.9        0.97777778 0.60810811
- 0.8        0.75555556 0.91111111 1.        ]
-
-mean value: 0.8662714138426282
-
-MCC on Blind test: 0.49
-
-Accuracy on Blind test: 0.78
-
-Model_name: AdaBoost Classifier 
-Model func: AdaBoostClassifier(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', AdaBoostClassifier(random_state=42))])
-
-key: fit_time 
-value: [0.0932951  0.08622575 0.08703232 0.08533549 0.08312297 0.08790827
- 0.08446789 0.08704805 0.08473301 0.08655405]
-
-mean value: 0.08657228946685791
-
-key: score_time 
-value: [0.01610136 0.0159595  0.00456142 0.01449895 0.01445603 0.01618457
- 0.01588774 0.01573229 0.01589179 0.01581264]
-
-mean value: 0.014508628845214843
-
-key: test_mcc 
-value: [0.6        0.81649658        nan 0.65465367 1.         0.6
- 1.         1.         0.65465367 0.81649658]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [0.8 0.9 nan 0.8 1.  0.8 1.  1.  0.8 0.9]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [0.8        0.90909091        nan 0.75       1.         0.8
- 1.         1.         0.83333333 0.88888889]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [0.8        0.83333333        nan 1.         1.         0.8
- 1.         1.         0.71428571 1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.8 1.  nan 0.6 1.  0.8 1.  1.  1.  0.8]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.8 0.9 nan 0.8 1.  0.8 1.  1.  0.8 0.9]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [0.66666667 0.83333333        nan 0.6        1.         0.66666667
- 1.         1.         0.71428571 0.8       ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-MCC on Blind test: 0.78
-
-Accuracy on Blind test: 0.9
-
-Model_name: Bagging Classifier 
-Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model',
-                 BaggingClassifier(n_jobs=10, oob_score=True,
-                                   random_state=42))])
-
-key: fit_time 
-value: [0.03166676 0.04806876 0.04904318 0.03325343 0.03756118 0.02605057
- 0.051754   0.03409243 0.02555013 0.03323889]
-
-mean value: 0.037027931213378905
-
-key: score_time 
-value: [0.02363086 0.02686834 0.00466347 0.02329421 0.02710223 0.02057576
- 0.02632928 0.01891589 0.01708174 0.02512145]
-
-mean value: 0.021358323097229005
-
-key: test_mcc 
-value: [0.81649658 0.81649658        nan 0.81649658 1.         0.6
- 1.         0.65465367 0.81649658 0.81649658]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.97801929 0.97801929 1.         1.         1.         1.
- 0.97801929 1.         1.         1.        ]
-
-mean value: 0.9934057881530954
-
-key: test_accuracy 
-value: [0.9 0.9 nan 0.9 1.  0.8 1.  0.8 0.9 0.9]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.98888889 0.98888889 1.         1.         1.         1.
- 0.98888889 1.         1.         1.        ]
-
-mean value: 0.9966666666666667
-
-key: test_fscore 
-value: [0.88888889 0.90909091        nan 0.90909091 1.         0.8
- 1.         0.83333333 0.90909091 0.88888889]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.98876404 0.98876404 1.         1.         1.         1.
- 0.98876404 1.         1.         1.        ]
-
-mean value: 0.996629213483146
-
-key: test_precision 
-value: [1.         0.83333333        nan 0.83333333 1.         0.8
- 1.         0.71428571 0.83333333 1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.8 1.  nan 1.  1.  0.8 1.  1.  1.  0.8]
-
-mean value: nan
-
-key: train_recall 
-value: [0.97777778 0.97777778 1.         1.         1.         1.
- 0.97777778 1.         1.         1.        ]
-
-mean value: 0.9933333333333333
-
-key: test_roc_auc 
-value: [0.9 0.9 nan 0.9 1.  0.8 1.  0.8 0.9 0.9]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [0.98888889 0.98888889 1.         1.         1.         1.
- 0.98888889 1.         1.         1.        ]
-
-mean value: 0.9966666666666667
-
-key: test_jcc 
-value: [0.8        0.83333333        nan 0.83333333 1.         0.66666667
- 1.         0.71428571 0.83333333 0.8       ]
-
-mean value: nan
-
-key: train_jcc 
-value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-[0.97777778 0.97777778 1.         1.         1.         1.
- 0.97777778 1.         1.         1.        ]
-
-mean value: 0.9933333333333333
-
-MCC on Blind test: 0.95
-
-Accuracy on Blind test: 0.98
-
-Model_name: Gaussian Process 
-Model func: GaussianProcessClassifier(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', GaussianProcessClassifier(random_state=42))])
-
-key: fit_time 
-value: [0.01846266 0.01602125 0.01668453 0.02254128 0.01823306 0.01637578
- 0.01689363 0.01705527 0.01624489 0.01630282]
-
-mean value: 0.017481517791748048
-
-key: score_time 
-value: [0.01143336 0.01123261 0.00608587 0.01233387 0.01204014 0.01180053
- 0.011935   0.01179743 0.01177883 0.01175404]
-
-mean value: 0.011219167709350586
-
-key: test_mcc 
-value: [0.         0.6               nan 0.2        0.81649658 0.6
- 0.65465367 0.5        0.33333333 1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [0.5 0.8 nan 0.6 0.9 0.8 0.8 0.7 0.6 1. ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [0.28571429 0.8               nan 0.6        0.90909091 0.8
- 0.83333333 0.76923077 0.71428571 1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [0.5        0.8               nan 0.6        0.83333333 0.8
- 0.71428571 0.625      0.55555556 1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.2 0.8 nan 0.6 1.  0.8 1.  1.  1.  1. ]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.5 0.8 nan 0.6 0.9 0.8 0.8 0.7 0.6 1. ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [0.16666667 0.66666667        nan 0.42857143 0.83333333 0.66666667
- 0.71428571 0.625      0.55555556 1.        ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-MCC on Blind test: 0.16
-
-Accuracy on Blind test: 0.62
-
-Model_name: Gradient Boosting 
-Model func: GradientBoostingClassifier(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
-  _warn_prf(average, modifier, msg_start, len(result))
-Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', GradientBoostingClassifier(random_state=42))])
-
-key: fit_time 
-value: [0.17908907 0.17281628 0.17472339 0.16933966 0.17832685 0.17855334
- 0.17665958 0.20065045 0.17976284 0.14105153]
-
-mean value: 0.17509729862213136
-
-key: score_time 
-value: [0.00950193 0.00909305 0.00476193 0.00946689 0.00997877 0.0098536
- 0.00984955 0.00980544 0.00993419 0.00911093]
-
-mean value: 0.009135627746582031
-
-key: test_mcc 
-value: [0.81649658 0.81649658        nan 0.81649658 1.         0.6
- 1.         1.         0.81649658 0.81649658]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [0.9 0.9 nan 0.9 1.  0.8 1.  1.  0.9 0.9]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [0.88888889 0.90909091        nan 0.90909091 1.         0.8
- 1.         1.         0.90909091 0.88888889]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [1.         0.83333333        nan 0.83333333 1.         0.8
- 1.         1.         0.83333333 1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.8 1.  nan 1.  1.  0.8 1.  1.  1.  0.8]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.9 0.9 nan 0.9 1.  0.8 1.  1.  0.9 0.9]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [0.8        0.83333333        nan 0.83333333 1.         0.66666667
- 1.         1.         0.83333333 0.8       ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-MCC on Blind test: 0.95
-
-Accuracy on Blind test: 0.98
-
-Model_name: QDA 
-Model func: QuadraticDiscriminantAnalysis() 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', QuadraticDiscriminantAnalysis())])
-
-key: fit_time 
-value: [0.01081228 0.01397157 0.01533651 0.01416707 0.01431513 0.01669645
- 0.0199163  0.01420426 0.01628375 0.01877785]
-
-mean value: 0.01544811725616455
-
-key: score_time 
-value: [0.01137567 0.01164627 0.00611782 0.01310349 0.01270056 0.01162291
- 0.01324058 0.01273084 0.01166701 0.01337457]
-
-mean value: 0.011757969856262207
-
-key: test_mcc 
-value: [0.33333333 0.81649658        nan 0.5        0.81649658 0.81649658
- 0.81649658 0.65465367 1.         0.81649658]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [0.6 0.9 nan 0.7 0.9 0.9 0.9 0.8 1.  0.9]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [0.33333333 0.88888889        nan 0.57142857 0.88888889 0.88888889
- 0.88888889 0.75       1.         0.88888889]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [ 1.  1. nan  1.  1.  1.  1.  1.  1.  1.]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.2 0.8 nan 0.4 0.8 0.8 0.8 0.6 1.  0.8]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.6 0.9 nan 0.7 0.9 0.9 0.9 0.8 1.  0.9]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [0.2 0.8 nan 0.4 0.8 0.8 0.8 0.6 1.  0.8]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-MCC on Blind test: 0.0
-
-Accuracy on Blind test: 0.65
-
-Model_name: Ridge Classifier 
-Model func: RidgeClassifier(random_state=42) 
-List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', RidgeClassifier(random_state=42))])
-
-key: fit_time 
-value: [0.03344846 0.03200626 0.032161   0.03211999 0.0323348  0.03212428
- 0.03187537 0.03182149 0.0317018  0.03172779]
-
-mean value: 0.03213212490081787
-
-key: score_time 
-value: [0.01959944 0.01622725 0.01191449 0.02088118 0.0222888  0.01162863
- 0.02132607 0.01181674 0.02234721 0.02092266]
-
-mean value: 0.01789524555206299
-
-key: test_mcc 
-value: [0.2        0.65465367        nan 0.65465367 0.81649658 0.81649658
- 0.81649658 0.65465367 0.65465367 0.81649658]
-
-mean value: nan
-
-key: train_mcc 
-value: [1.         0.97801929 0.97801929 0.97801929 0.97801929 1.
- 0.97801929 0.97801929 1.         0.97801929]
-
-mean value: 0.9846135056905561
-
-key: test_accuracy 
-value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1.         0.98888889 0.98888889 0.98888889 0.98888889 1.
- 0.98888889 0.98888889 1.         0.98888889]
-
-mean value: 0.9922222222222222
-
-key: test_fscore 
-value: [0.6        0.83333333        nan 0.83333333 0.90909091 0.88888889
- 0.90909091 0.83333333 0.83333333 0.88888889]
-
-mean value: nan
-
-key: train_fscore 
-value: [1.         0.98901099 0.98901099 0.98901099 0.98901099 1.
- 0.98901099 0.98901099 1.         0.98901099]
-
-mean value: 0.9923076923076923
-
-key: test_precision 
-value: [0.6        0.71428571        nan 0.71428571 0.83333333 1.
- 0.83333333 0.71428571 0.71428571 1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1.         0.97826087 0.97826087 0.97826087 0.97826087 1.
- 0.97826087 0.97826087 1.         0.97826087]
-
-mean value: 0.9847826086956522
-
-key: test_recall 
-value: [0.6 1.  nan 1.  1.  0.8 1.  1.  1.  0.8]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1.         0.98888889 0.98888889 0.98888889 0.98888889 1.
- 0.98888889 0.98888889 1.         0.98888889]
-
-mean value: 0.9922222222222221
-
-key: test_jcc 
-value: [0.42857143 0.71428571        nan 0.71428571 0.83333333 0.8
- 0.83333333 0.71428571 0.71428571 0.8       ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1.         0.97826087 0.97826087 0.97826087 0.97826087 1.
- 0.97826087 0.97826087 1.         0.97826087]
-
-mean value: 0.9847826086956522
-
-MCC on Blind test: 0.48
-
-Accuracy on Blind test: 0.78
-
-Model_name: Ridge ClassifierCV 
-Model func: RidgeClassifierCV(cv=10) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:148: SettingWithCopyWarning: 
-A value is trying to be set on a copy of a slice from a DataFrame
-
-See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
-  ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
-/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:151: SettingWithCopyWarning: 
-A value is trying to be set on a copy of a slice from a DataFrame
-
-See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
-  ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', RidgeClassifierCV(cv=10))])
-
-key: fit_time 
-value: [0.21318793 0.09895921 0.21072364 0.19039989 0.18606758 0.18941188
- 0.18936801 0.20299673 0.19019341 0.22260165]
-
-mean value: 0.18939099311828614
-
-key: score_time 
-value: [0.02227712 0.01179457 0.01261592 0.02156162 0.02067518 0.02020216
- 0.02313948 0.0224812  0.02181292 0.02340198]
-
-mean value: 0.019996213912963866
-
-key: test_mcc 
-value: [0.2        0.65465367        nan 0.65465367 0.81649658 0.81649658
- 0.81649658 0.65465367 0.65465367 0.81649658]
-
-mean value: nan
-
-key: train_mcc 
-value: [1.         0.97801929 0.97801929 0.97801929 0.97801929 1.
- 0.97801929 0.97801929 1.         0.97801929]
-
-mean value: 0.9846135056905561
-
-key: test_accuracy 
-value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1.         0.98888889 0.98888889 0.98888889 0.98888889 1.
- 0.98888889 0.98888889 1.         0.98888889]
-
-mean value: 0.9922222222222222
-
-key: test_fscore 
-value: [0.6        0.83333333        nan 0.83333333 0.90909091 0.88888889
- 0.90909091 0.83333333 0.83333333 0.88888889]
-
-mean value: nan
-
-key: train_fscore 
-value: [1.         0.98901099 0.98901099 0.98901099 0.98901099 1.
- 0.98901099 0.98901099 1.         0.98901099]
-
-mean value: 0.9923076923076923
-
-key: test_precision 
-value: [0.6        0.71428571        nan 0.71428571 0.83333333 1.
- 0.83333333 0.71428571 0.71428571 1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1.         0.97826087 0.97826087 0.97826087 0.97826087 1.
- 0.97826087 0.97826087 1.         0.97826087]
-
-mean value: 0.9847826086956522
-
-key: test_recall 
-value: [0.6 1.  nan 1.  1.  0.8 1.  1.  1.  0.8]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1.         0.98888889 0.98888889 0.98888889 0.98888889 1.
- 0.98888889 0.98888889 1.         0.98888889]
-
-mean value: 0.9922222222222221
-
-key: test_jcc 
-value: [0.42857143 0.71428571        nan 0.71428571 0.83333333 0.8
- 0.83333333 0.71428571 0.71428571 0.8       ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1.         0.97826087 0.97826087 0.97826087 0.97826087 1.
- 0.97826087 0.97826087 1.         0.97826087]
-
-mean value: 0.9847826086956522
-
-MCC on Blind test: 0.48
-
-Accuracy on Blind test: 0.78
-
-Model_name: Logistic Regression 
-Model func: LogisticRegression(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', LogisticRegression(random_state=42))])
-
-key: fit_time 
-value: [0.0250783  0.03367138 0.02173638 0.03091216 0.0839653  0.04441166
- 0.03470874 0.0207479  0.02312613 0.02219653]
-
-mean value: 0.034055447578430174
-
-key: score_time 
-value: [0.01159692 0.01183295 0.01154208 0.0117619  0.01320601 0.00625491
- 0.01170278 0.01160264 0.0115304  0.01158214]
-
-mean value: 0.011261272430419921
-
-key: test_mcc 
-value: [ 0.33333333  0.70710678  0.4472136   1.                 nan         nan
- -0.33333333  1.          1.          1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.96225045 1.         1.         1.         0.96225045 1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9924500897298753
-
-key: test_accuracy 
-value: [0.66666667 0.83333333 0.66666667 1.                nan        nan
- 0.33333333 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.98076923 1.         1.         1.         0.98076923 1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9961538461538462
-
-key: test_fscore 
-value: [0.66666667 0.85714286 0.75       1.                nan        nan
- 0.33333333 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.98039216 1.         1.         1.         0.98039216 1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.996078431372549
-
-key: test_precision 
-value: [0.66666667 0.75       0.6        1.                nan        nan
- 0.33333333 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-[0.66666667 1.         1.         1.                nan        nan
- 0.33333333 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_recall 
-value: [0.96153846 1.         1.         1.         0.96153846 1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9923076923076923
-
-key: test_roc_auc 
-value: [0.66666667 0.83333333 0.66666667 1.                nan        nan
- 0.33333333 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [0.98076923 1.         1.         1.         0.98076923 1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9961538461538462
-
-key: test_jcc 
-value: [0.5  0.75 0.6  1.    nan  nan 0.2  1.   1.   1.  ]
-
-mean value: nan
-
-key: train_jcc 
-value: [0.96153846 1.         1.         1.         0.96153846 1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9923076923076923
-
-MCC on Blind test: 0.21
-
-Accuracy on Blind test: 0.6
-
-Model_name: Logistic RegressionCV 
-Model func: LogisticRegressionCV(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', LogisticRegressionCV(random_state=42))])
-
-key: fit_time 
-value: [0.27737689 0.28079844 0.29659295 0.26077461 0.26526332 0.26656747
- 0.26323867 0.28429699 0.31050968 0.30310488]
-
-mean value: 0.28085238933563234
-
-key: score_time 
-value: [0.01181126 0.01163769 0.01170397 0.0118041  0.00647473 0.00621986
- 0.01164699 0.01164746 0.0117209  0.01170206]
-
-mean value: 0.01063690185546875
-
-key: test_mcc 
-value: [0.33333333 0.70710678 0.4472136  1.                nan        nan
- 0.70710678 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [0.66666667 0.83333333 0.66666667 1.                nan        nan
- 0.83333333 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [0.66666667 0.85714286 0.75       1.                nan        nan
- 0.8        1.         1.         1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [0.66666667 0.75       0.6        1.                nan        nan
- 1.         1.         1.         1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.66666667 1.         1.         1.                nan        nan
- 0.66666667 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.66666667 0.83333333 0.66666667 1.                nan        nan
- 0.83333333 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [0.5        0.75       0.6        1.                nan        nan
- 0.66666667 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-MCC on Blind test: 0.25
-
-Accuracy on Blind test: 0.62
-
-Model_name: Gaussian NB 
-Model func: GaussianNB() 
-List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', GaussianNB())])
-
-key: fit_time 
-value: [0.01206636 0.0112257  0.00846434 0.0084424  0.01119018 0.01600456
- 0.0084219  0.00850463 0.00825167 0.00816083]
-
-mean value: 0.010073256492614747
-
-key: score_time 
-value: [0.01196933 0.00882673 0.00861526 0.01163769 0.00628543 0.00520444
- 0.00848961 0.00851965 0.00831652 0.00836253]
-
-mean value: 0.00862271785736084
-
-key: test_mcc 
-value: [-0.4472136   0.33333333  0.33333333  0.70710678         nan         nan
- -0.4472136   0.          0.16666667 -0.16666667]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.85634884 0.80829038 0.84866842 0.89056356 0.70064905 0.82305489
- 0.77151675 0.77151675 0.53088871 0.81196581]
-
-mean value: 0.7813463156135712
-
-key: test_accuracy 
-value: [0.33333333 0.66666667 0.66666667 0.83333333        nan        nan
- 0.33333333 0.5        0.6        0.4       ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.92307692 0.90384615 0.92307692 0.94230769 0.84615385 0.90384615
- 0.88461538 0.88461538 0.71698113 0.90566038]
-
-mean value: 0.8834179970972423
-
-key: test_fscore 
-value: [0.         0.66666667 0.66666667 0.8               nan        nan
- 0.5        0.4        0.5        0.4       ]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.91666667 0.90196078 0.92       0.93877551 0.83333333 0.9122807
- 0.88       0.88       0.61538462 0.90566038]
-
-mean value: 0.8704061989015298
-
-key: test_precision 
-value: [0.         0.66666667 0.66666667 1.                nan        nan
- 0.4        0.5        0.5        0.5       ]
-
-mean value: nan
-
-key: train_precision 
-value: [1.         0.92       0.95833333 1.         0.90909091 0.83870968
- 0.91666667 0.91666667 1.         0.88888889]
-
-mean value: 0.9348356142065819
-
-key: test_recall 
-value: [0.         0.66666667 0.66666667 0.66666667        nan        nan
- 0.66666667 0.33333333 0.5        0.33333333]
-
-mean value: nan
-
-key: train_recall 
-value: [0.84615385 0.88461538 0.88461538 0.88461538 0.76923077 1.
- 0.84615385 0.84615385 0.44444444 0.92307692]
-
-mean value: 0.832905982905983
-
-key: test_roc_auc 
-value: [0.33333333 0.66666667 0.66666667 0.83333333        nan        nan
- 0.33333333 0.5        0.58333333 0.41666667]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [0.92307692 0.90384615 0.92307692 0.94230769 0.84615385 0.90384615
- 0.88461538 0.88461538 0.72222222 0.90598291]
-
-mean value: 0.883974358974359
-
-key: test_jcc 
-value: [0.         0.5        0.5        0.66666667        nan        nan
- 0.33333333 0.25       0.33333333 0.25      ]
-
-mean value: nan
-
-key: train_jcc 
-value: [0.84615385 0.82142857 0.85185185 0.88461538 0.71428571 0.83870968
- 0.78571429 0.78571429 0.44444444 0.82758621]
-
-mean value: 0.7800504268524291
-
-MCC on Blind test: -0.07
-
-Accuracy on Blind test: 0.48
-
-Model_name: Naive Bayes 
-Model func: BernoulliNB() 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
-  _warn_prf(average, modifier, msg_start, len(result))
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', BernoulliNB())])
-
-key: fit_time 
-value: [0.00878549 0.00863194 0.00832844 0.00831485 0.00832462 0.00855541
- 0.00872874 0.00837517 0.00835276 0.00829864]
-
-mean value: 0.008469605445861816
-
-key: score_time 
-value: [0.00858736 0.00840878 0.00837111 0.00836158 0.00419855 0.00429416
- 0.00853777 0.00835299 0.00840139 0.00841165]
-
-mean value: 0.007592535018920899
-
-key: test_mcc 
-value: [ 0.          0.          0.33333333  0.                 nan         nan
-  0.70710678  0.70710678 -0.40824829  0.61237244]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.69436507 0.71151247 0.70064905 0.77849894 0.77151675 0.81312325
- 0.80829038 0.77151675 0.71778392 0.77540056]
-
-mean value: 0.7542657149596319
-
-key: test_accuracy 
-value: [0.5        0.5        0.66666667 0.5               nan        nan
- 0.83333333 0.83333333 0.4        0.8       ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615
- 0.90384615 0.88461538 0.8490566  0.88679245]
-
-mean value: 0.8735849056603774
-
-key: test_fscore 
-value: [0.4        0.4        0.66666667 0.4               nan        nan
- 0.85714286 0.8        0.         0.85714286]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.84       0.82608696 0.83333333 0.875      0.88888889 0.89795918
- 0.90196078 0.88       0.83333333 0.88      ]
-
-mean value: 0.865656248006449
-
-key: test_precision 
-value: [0.5        0.5        0.66666667 0.5               nan        nan
- 0.75       1.         0.         0.75      ]
-
-mean value: nan
-
-key: train_precision 
-value: [0.875      0.95       0.90909091 0.95454545 0.85714286 0.95652174
- 0.92       0.91666667 0.95238095 0.91666667]
-
-mean value: 0.9208015245623942
-
-key: test_recall 
-value: [0.33333333 0.33333333 0.66666667 0.33333333        nan        nan
- 1.         0.66666667 0.         1.        ]
-
-mean value: nan
-
-key: train_recall 
-value: [0.80769231 0.73076923 0.76923077 0.80769231 0.92307692 0.84615385
- 0.88461538 0.84615385 0.74074074 0.84615385]
-
-mean value: 0.8202279202279202
-
-key: test_roc_auc 
-value: [0.5        0.5        0.66666667 0.5               nan        nan
- 0.83333333 0.83333333 0.33333333 0.75      ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615
- 0.90384615 0.88461538 0.8511396  0.88603989]
-
-mean value: 0.8737179487179488
-
-key: test_jcc 
-value: [0.25       0.25       0.5        0.25              nan        nan
- 0.75       0.66666667 0.         0.75      ]
-
-mean value: nan
-
-key: train_jcc 
-value: [0.72413793 0.7037037  0.71428571 0.77777778 0.8        0.81481481
- 0.82142857 0.78571429 0.71428571 0.78571429]
-
-mean value: 0.764186279875935
-
-MCC on Blind test: -0.03
-
-Accuracy on Blind test: 0.5
-
-Model_name: K-Nearest Neighbors 
-Model func: KNeighborsClassifier() 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', KNeighborsClassifier())])
-
-key: fit_time 
-value: [0.00827241 0.00880098 0.00887871 0.00861597 0.00911736 0.00877094
- 0.00847507 0.00913715 0.00888658 0.00889826]
-
-mean value: 0.008785343170166016
-
-key: score_time 
-value: [0.00981283 0.0094254  0.00966692 0.00966859 0.0045979  0.00435901
- 0.00986171 0.009624   0.00968266 0.0103004 ]
-
-mean value: 0.008699941635131835
-
-key: test_mcc 
-value: [ 0.4472136   0.33333333  0.70710678  0.                 nan         nan
- -0.4472136   1.          0.61237244  0.61237244]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.58333333 0.55339859 0.58080232 0.71151247 0.58789635 0.71151247
- 0.58789635 0.65824263 0.53035501 0.57140596]
-
-mean value: 0.6076355494357508
-
-key: test_accuracy 
-value: [0.66666667 0.66666667 0.83333333 0.5               nan        nan
- 0.33333333 1.         0.8        0.8       ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.76923077 0.76923077 0.78846154 0.84615385 0.78846154 0.84615385
- 0.78846154 0.82692308 0.75471698 0.77358491]
-
-mean value: 0.7951378809869376
-
-key: test_fscore 
-value: [0.5        0.66666667 0.85714286 0.                nan        nan
- 0.5        1.         0.66666667 0.85714286]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.71428571 0.73913043 0.7755102  0.82608696 0.76595745 0.82608696
- 0.76595745 0.81632653 0.72340426 0.72727273]
-
-mean value: 0.7680018673014577
-
-key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-test_precision 
-value: [1.         0.66666667 0.75       0.                nan        nan
- 0.4        1.         1.         0.75      ]
-
-mean value: nan
-
-key: train_precision 
-value: [0.9375     0.85       0.82608696 0.95       0.85714286 0.95
- 0.85714286 0.86956522 0.85       0.88888889]
-
-mean value: 0.8836326777087646
-
-key: test_recall 
-value: [0.33333333 0.66666667 1.         0.                nan        nan
- 0.66666667 1.         0.5        1.        ]
-
-mean value: nan
-
-key: train_recall 
-value: [0.57692308 0.65384615 0.73076923 0.73076923 0.69230769 0.73076923
- 0.69230769 0.76923077 0.62962963 0.61538462]
-
-mean value: 0.6821937321937321
-
-key: test_roc_auc 
-value: [0.66666667 0.66666667 0.83333333 0.5               nan        nan
- 0.33333333 1.         0.75       0.75      ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [0.76923077 0.76923077 0.78846154 0.84615385 0.78846154 0.84615385
- 0.78846154 0.82692308 0.75712251 0.77065527]
-
-mean value: 0.7950854700854701
-
-key: test_jcc 
-value: [0.33333333 0.5        0.75       0.                nan        nan
- 0.33333333 1.         0.5        0.75      ]
-
-mean value: nan
-
-key: train_jcc 
-value: [0.55555556 0.5862069  0.63333333 0.7037037  0.62068966 0.7037037
- 0.62068966 0.68965517 0.56666667 0.57142857]
-
-mean value: 0.6251632913701879
-
-MCC on Blind test: 0.07
-
-Accuracy on Blind test: 0.55
-
-Model_name: SVM 
-Model func: SVC(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', SVC(random_state=42))])
-
-key: fit_time 
-value: [0.00861406 0.00843453 0.00842524 0.00845695 0.00851583 0.00869155
- 0.01020479 0.008883   0.0094862  0.00905514]
-
-mean value: 0.008876729011535644
-
-key: score_time 
-value: [0.00841975 0.00838161 0.00835943 0.00834155 0.00417018 0.00427651
- 0.0093472  0.00916672 0.00854087 0.00875092]
-
-mean value: 0.0077754735946655275
-
-key: test_mcc 
-value: [ 0.          0.33333333  0.4472136   0.4472136          nan         nan
- -0.4472136   0.70710678  0.61237244  1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [0.9258201  0.89056356 0.9258201  0.89056356 0.92307692 0.9258201
- 0.84615385 0.80829038 0.89271208 0.89227454]
-
-mean value: 0.8921095178279635
-
-key: test_accuracy 
-value: [0.5        0.66666667 0.66666667 0.66666667        nan        nan
- 0.33333333 0.83333333 0.8        1.        ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [0.96153846 0.94230769 0.96153846 0.94230769 0.96153846 0.96153846
- 0.92307692 0.90384615 0.94339623 0.94339623]
-
-mean value: 0.9444484760522497
-
-key: test_fscore 
-value: [0.4        0.66666667 0.75       0.5               nan        nan
- 0.5        0.8        0.66666667 1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [0.96       0.93877551 0.96       0.93877551 0.96153846 0.96
- 0.92307692 0.90566038 0.94117647 0.93877551]
-
-mean value: 0.9427778763174356
-
-key: test_precision 
-value: [0.5        0.66666667 0.6        1.                nan        nan
- 0.4        1.         1.         1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1.         1.         1.         1.         0.96153846 1.
- 0.92307692 0.88888889 1.         1.        ]
-
-mean value: 0.9773504273504273
-
-key: test_recall 
-value: [0.33333333 0.66666667 1.         0.33333333        nan        nan
- 0.66666667 0.66666667 0.5        1.        ]
-
-mean value: nan
-
-key: train_recall 
-value: [0.92307692 0.88461538 0.92307692 0.88461538 0.96153846 0.92307692
- 0.92307692 0.92307692 0.88888889 0.88461538]
-
-mean value: 0.911965811965812
-
-key: test_roc_auc 
-value: [0.5        0.66666667 0.66666667 0.66666667        nan        nan
- 0.33333333 0.83333333 0.75       1.        ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [0.96153846 0.94230769 0.96153846 0.94230769 0.96153846 0.96153846
- 0.92307692 0.90384615 0.94444444 0.94230769]
-
-mean value: 0.9444444444444444
-
-key: test_jcc 
-value: [0.25       0.5        0.6        0.33333333        nan        nan
- 0.33333333 0.66666667 0.5        1.        ]
-
-mean value: nan
-
-key: train_jcc 
-value: [0.92307692 0.88461538 0.92307692 0.88461538 0.92592593 0.92307692
- 0.85714286 0.82758621 0.88888889 0.88461538]
-
-mean value: 0.8922620801931147
-
-MCC on Blind test: -0.07
-
-Accuracy on Blind test: 0.45
-
-Model_name: MLP 
-Model func: MLPClassifier(max_iter=500, random_state=42) 
-List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', MLPClassifier(max_iter=500, random_state=42))])
-
-key: fit_time 
-value: [0.257622   0.25290346 0.26910257 0.36610937 0.23078704 0.28138971
- 0.24945307 0.25755453 0.2490387  0.25744033]
-
-mean value: 0.2671400785446167
-
-key: score_time 
-value: [0.01196766 0.01187682 0.01183176 0.01191616 0.0065763  0.00656652
- 0.01190734 0.01188445 0.0118742  0.01191521]
-
-mean value: 0.010831642150878906
-
-key: test_mcc 
-value: [0.33333333 0.33333333 0.4472136  0.70710678        nan        nan
- 0.         1.         0.61237244 1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [0.66666667 0.66666667 0.66666667 0.83333333        nan        nan
- 0.5        1.         0.8        1.        ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [0.66666667 0.66666667 0.75       0.8               nan        nan
- 0.4        1.         0.66666667 1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [0.66666667 0.66666667 0.6        1.                nan        nan
- 0.5        1.         1.         1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.66666667 0.66666667 1.         0.66666667        nan        nan
- 0.33333333 1.         0.5        1.        ]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.66666667 0.66666667 0.66666667 0.83333333        nan        nan
- 0.5        1.         0.75       1.        ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [0.5        0.5        0.6        0.66666667        nan        nan
- 0.25       1.         0.5        1.        ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-MCC on Blind test: 0.07
-
-Accuracy on Blind test: 0.52
-
-Model_name: Decision Tree 
-Model func: DecisionTreeClassifier(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
-              colsample_bynode=None, colsample_bytree=None,
-              enable_categorical=False, gamma=None, gpu_id=None,
-              importance_type=None, interaction_constraints=None,
-              learning_rate=None, max_delta_step=None, max_depth=None,
-              min_child_weight=None, missing=nan, monotone_constraints=None,
-              n_estimators=100, n_jobs=None, num_parallel_tree=None,
-              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
-              scale_pos_weight=None, subsample=None, tree_method=None,
-              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', DecisionTreeClassifier(random_state=42))])
-
-key: fit_time 
-value: [0.01336122 0.01300812 0.00985575 0.0098536  0.00926661 0.00887156
- 0.00901055 0.00941229 0.00927663 0.00903344]
-
-mean value: 0.010094976425170899
-
-key: score_time 
-value: [0.0113616  0.0098033  0.00913811 0.00857115 0.00427389 0.00418425
- 0.00839829 0.00844026 0.00838804 0.00840545]
-
-mean value: 0.008096432685852051
-
-key: test_mcc 
-value: [0.33333333 1.         0.70710678 1.                nan        nan
- 0.70710678 0.70710678 0.61237244 1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [0.66666667 1.         0.83333333 1.                nan        nan
- 0.83333333 0.83333333 0.8        1.        ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [0.66666667 1.         0.8        1.                nan        nan
- 0.85714286 0.8        0.66666667 1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [0.66666667 1.         1.         1.                nan        nan
- 0.75       1.         1.         1.        ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [0.66666667 1.         0.66666667 1.                nan        nan
- 1.         0.66666667 0.5        1.        ]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [0.66666667 1.         0.83333333 1.                nan        nan
- 0.83333333 0.83333333 0.75       1.        ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [0.5        1.         0.66666667 1.                nan        nan
- 0.75       0.66666667 0.5        1.        ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-MCC on Blind test: 0.73
-
-Accuracy on Blind test: 0.88
-
 Model_name: Extra Trees 
 Model func: ExtraTreesClassifier(random_state=42) 
 List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
@@ -18018,20 +13101,20 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', ExtraTreesClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.07744193 0.0774622  0.07790446 0.07826352 0.07741332 0.07785106
- 0.07745719 0.07764459 0.07778835 0.07841229]
+value: [0.09140396 0.11081243 0.1061058  0.1074965  0.10002065 0.08645439
+ 0.08668065 0.08711362 0.08664465 0.08567667]
 
-mean value: 0.07776389122009278
+mean value: 0.09484093189239502
 
 key: score_time 
-value: [0.01661301 0.0167923  0.01716065 0.01694202 0.00452185 0.00454688
- 0.0172863  0.01668048 0.016675   0.01684666]
+value: [0.01851869 0.01910639 0.00513697 0.01879001 0.02097702 0.01896
+ 0.01881933 0.01879549 0.01810431 0.01854086]
 
-mean value: 0.014406514167785645
+mean value: 0.01757490634918213
 
 key: test_mcc 
-value: [0.         0.33333333 0.70710678 0.33333333        nan        nan
- 0.4472136  0.70710678 0.61237244 1.        ]
+value: [0.81649658 0.6               nan 0.6        0.81649658 0.81649658
+ 0.40824829 0.40824829 0.40824829 0.81649658]
 
 mean value: nan
 
@@ -18041,8 +13124,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.5        0.66666667 0.83333333 0.66666667        nan        nan
- 0.66666667 0.83333333 0.8        1.        ]
+value: [0.9 0.8 nan 0.8 0.9 0.9 0.7 0.7 0.7 0.9]
 
 mean value: nan
 
@@ -18052,8 +13134,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.4        0.66666667 0.85714286 0.66666667        nan        nan
- 0.75       0.8        0.66666667 1.        ]
+value: [0.88888889 0.8               nan 0.8        0.90909091 0.88888889
+ 0.66666667 0.72727273 0.72727273 0.88888889]
 
 mean value: nan
 
@@ -18063,8 +13145,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.5        0.66666667 0.75       0.66666667        nan        nan
- 0.6        1.         1.         1.        ]
+value: [1.         0.8               nan 0.8        0.83333333 1.
+ 0.75       0.66666667 0.66666667 1.        ]
 
 mean value: nan
 
@@ -18074,8 +13156,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.33333333 0.66666667 1.         0.66666667        nan        nan
- 1.         0.66666667 0.5        1.        ]
+value: [0.8 0.8 nan 0.8 1.  0.8 0.6 0.8 0.8 0.8]
 
 mean value: nan
 
@@ -18085,8 +13166,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.5        0.66666667 0.83333333 0.66666667        nan        nan
- 0.66666667 0.83333333 0.75       1.        ]
+value: [0.9 0.8 nan 0.8 0.9 0.9 0.7 0.7 0.7 0.9]
 
 mean value: nan
 
@@ -18096,8 +13176,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.25       0.5        0.75       0.5               nan        nan
- 0.6        0.66666667 0.5        1.        ]
+value: [0.8        0.66666667        nan 0.66666667 0.83333333 0.8
+ 0.5        0.57142857 0.57142857 0.8       ]
 
 mean value: nan
 
@@ -18105,7 +13185,14 @@ key: train_jcc
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+
+MCC on Blind test: 0.49
+
+Accuracy on Blind test: 0.78
+
+Model_name: Extra Tree 
+Model func: ExtraTreeClassifier(random_state=42) 
+List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
     return cache[method]
@@ -18153,62 +13240,7 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
   warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-
-MCC on Blind test: 0.21
-
-Accuracy on Blind test: 0.57
-
-Model_name: Extra Tree 
-Model func: ExtraTreeClassifier(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
                        n_estimators=1000, n_jobs=10, oob_score=True,
                        random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
               colsample_bynode=None, colsample_bytree=None,
@@ -18237,20 +13269,20 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', ExtraTreeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.00849891 0.00832915 0.0084703  0.00833631 0.00853562 0.00831246
- 0.00834322 0.00833392 0.00853539 0.00829196]
+value: [0.00981331 0.00926542 0.00961494 0.00931954 0.00935292 0.00946355
+ 0.00947499 0.00947022 0.00934291 0.00956202]
 
-mean value: 0.008398723602294923
+mean value: 0.009467983245849609
 
 key: score_time 
-value: [0.00839496 0.00838709 0.00878024 0.0090816  0.00428891 0.00421572
- 0.00838637 0.00845718 0.00835276 0.00842619]
+value: [0.00965023 0.00959539 0.00480247 0.00923729 0.0094893  0.00876379
+ 0.00931597 0.00915647 0.00923562 0.00858641]
 
-mean value: 0.007677102088928222
+mean value: 0.008783292770385743
 
 key: test_mcc 
-value: [0.         0.33333333 0.4472136  0.                nan        nan
- 0.4472136  0.         0.61237244 0.61237244]
+value: [0.65465367 0.                nan 0.81649658 0.81649658 0.81649658
+ 0.2        0.5        0.6        0.81649658]
 
 mean value: nan
 
@@ -18260,8 +13292,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.5        0.66666667 0.66666667 0.5               nan        nan
- 0.66666667 0.5        0.8        0.8       ]
+value: [0.8 0.5 nan 0.9 0.9 0.9 0.6 0.7 0.8 0.9]
 
 mean value: nan
 
@@ -18271,8 +13302,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.4        0.66666667 0.75       0.57142857        nan        nan
- 0.5        0.4        0.66666667 0.85714286]
+value: [0.83333333 0.61538462        nan 0.88888889 0.90909091 0.90909091
+ 0.6        0.76923077 0.8        0.88888889]
 
 mean value: nan
 
@@ -18282,8 +13313,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.5        0.66666667 0.6        0.5               nan        nan
- 1.         0.5        1.         0.75      ]
+value: [0.71428571 0.5               nan 1.         0.83333333 0.83333333
+ 0.6        0.625      0.8        1.        ]
 
 mean value: nan
 
@@ -18293,8 +13324,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.33333333 0.66666667 1.         0.66666667        nan        nan
- 0.33333333 0.33333333 0.5        1.        ]
+value: [1.  0.8 nan 0.8 1.  1.  0.6 1.  0.8 0.8]
 
 mean value: nan
 
@@ -18304,8 +13334,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.5        0.66666667 0.66666667 0.5               nan        nan
- 0.66666667 0.5        0.75       0.75      ]
+value: [0.8 0.5 nan 0.9 0.9 0.9 0.6 0.7 0.8 0.9]
 
 mean value: nan
 
@@ -18315,8 +13344,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.25       0.5        0.6        0.4               nan        nan
- 0.33333333 0.25       0.5        0.75      ]
+value: [0.71428571 0.44444444        nan 0.8        0.83333333 0.83333333
+ 0.42857143 0.625      0.66666667 0.8       ]
 
 mean value: nan
 
@@ -18325,7 +13354,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: 0.06
+MCC on Blind test: 0.03
 
 Accuracy on Blind test: 0.5
 
@@ -18391,58 +13420,6 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
   warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
-  warn(
 /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
   warn(
 /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
@@ -18499,54 +13476,10 @@ ValueError: Found unknown categories ['XDR'] in column 5 during transform
   warnings.warn(
 /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
   warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
 /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
   warn(
 /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
@@ -18573,20 +13506,20 @@ Pipeline(steps=[('prep',
                  RandomForestClassifier(n_estimators=1000, random_state=42))])
 
 key: fit_time 
-value: [0.9603796  0.96432114 0.96725059 0.96787596 0.9631319  0.97312737
- 0.96518373 0.96293473 0.96418524 0.96683455]
+value: [1.050071   1.07899761 1.01138568 1.02257371 1.02732587 1.01924753
+ 1.05872726 1.03712177 1.03975534 1.04403877]
 
-mean value: 0.9655224800109863
+mean value: 1.0389244556427002
 
 key: score_time 
-value: [0.14033937 0.08650279 0.0865438  0.08715081 0.0047214  0.0044682
- 0.08628559 0.08696723 0.08689618 0.08648324]
+value: [0.08884025 0.0888741  0.00447512 0.09609246 0.08854914 0.09480143
+ 0.10091877 0.09175038 0.0905838  0.08938766]
 
-mean value: 0.07563586235046386
+mean value: 0.0834273099899292
 
 key: test_mcc 
-value: [0.70710678 0.70710678 1.         0.70710678        nan        nan
- 0.70710678 0.70710678 0.16666667 0.66666667]
+value: [0.81649658 0.6               nan 0.81649658 1.         0.81649658
+ 0.65465367 0.40824829 0.40824829 0.81649658]
 
 mean value: nan
 
@@ -18596,8 +13529,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.83333333 0.83333333 1.         0.83333333        nan        nan
- 0.83333333 0.83333333 0.6        0.8       ]
+value: [0.9 0.8 nan 0.9 1.  0.9 0.8 0.7 0.7 0.9]
 
 mean value: nan
 
@@ -18607,8 +13539,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.8        0.85714286 1.         0.8               nan        nan
- 0.85714286 0.8        0.5        0.8       ]
+value: [0.90909091 0.8               nan 0.88888889 1.         0.88888889
+ 0.75       0.72727273 0.72727273 0.88888889]
 
 mean value: nan
 
@@ -18618,7 +13550,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [1.   0.75 1.   1.    nan  nan 0.75 1.   0.5  1.  ]
+value: [0.83333333 0.8               nan 1.         1.         1.
+ 1.         0.66666667 0.66666667 1.        ]
 
 mean value: nan
 
@@ -18628,8 +13561,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.66666667 1.         1.         0.66666667        nan        nan
- 1.         0.66666667 0.5        0.66666667]
+value: [1.  0.8 nan 0.8 1.  0.8 0.6 0.8 0.8 0.8]
 
 mean value: nan
 
@@ -18639,8 +13571,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.83333333 0.83333333 1.         0.83333333        nan        nan
- 0.83333333 0.83333333 0.58333333 0.83333333]
+value: [0.9 0.8 nan 0.9 1.  0.9 0.8 0.7 0.7 0.9]
 
 mean value: nan
 
@@ -18650,8 +13581,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.66666667 0.75       1.         0.66666667        nan        nan
- 0.75       0.66666667 0.33333333 0.66666667]
+value: [0.83333333 0.66666667        nan 0.8        1.         0.8
+ 0.6        0.57142857 0.57142857 0.8       ]
 
 mean value: nan
 
@@ -18660,9 +13591,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: 0.35
+MCC on Blind test: 0.55
 
-Accuracy on Blind test: 0.68
+Accuracy on Blind test: 0.8
 
 Model_name: Random Forest2 
 Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
@@ -18700,70 +13631,76 @@ Running model pipeline: Pipeline(steps=[('prep',
                                         oob_score=True, random_state=42))])
 
 key: fit_time 
-value: [0.83453751 0.86814737 0.89418793 0.84960437 0.85452509 0.79625249
- 0.84640026 0.8471365  0.86017728 0.86971331]
+value: [0.86844516 0.87865114 0.92193866 0.84994102 0.85754681 0.82041979
+ 0.87674642 0.89583445 0.9129231  0.9018712 ]
 
-mean value: 0.8520682096481323
+mean value: 0.87843177318573
 
 key: score_time 
-value: [0.18317986 0.18850303 0.23107004 0.17553663 0.00471902 0.00476313
- 0.23304439 0.20270252 0.2285862  0.14570665]
+value: [0.17786026 0.21679783 0.00455117 0.22086644 0.21914244 0.20099974
+ 0.23634052 0.21746397 0.24935365 0.14481735]
 
-mean value: 0.1597811460494995
+mean value: 0.1888193368911743
 
 key: test_mcc 
-value: [0.         0.70710678 1.         0.70710678        nan        nan
- 0.70710678 1.         0.16666667 0.66666667]
+value: [0.81649658 0.6               nan 0.6        1.         1.
+ 0.40824829 0.40824829 0.6        0.81649658]
 
 mean value: nan
 
 key: train_mcc 
-value: [0.96225045 1.         1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [0.97801929 0.95555556 0.91201231 0.93356387 0.97801929 0.95555556
+ 0.95555556 0.95555556 0.97801929 0.93356387]
 
-mean value: 0.9962250448649377
+mean value: 0.9535420155810868
 
 key: test_accuracy 
-value: [0.5        0.83333333 1.         0.83333333        nan        nan
- 0.83333333 1.         0.6        0.8       ]
+value: [0.9 0.8 nan 0.8 1.  1.  0.7 0.7 0.8 0.9]
 
 mean value: nan
 
 key: train_accuracy 
-value: [0.98076923 1.         1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [0.98888889 0.97777778 0.95555556 0.96666667 0.98888889 0.97777778
+ 0.97777778 0.97777778 0.98888889 0.96666667]
 
-mean value: 0.9980769230769231
+mean value: 0.9766666666666667
 
 key: test_fscore 
-value: [0.4        0.85714286 1.         0.8               nan        nan
- 0.85714286 1.         0.5        0.8       ]
+value: [0.90909091 0.8               nan 0.8        1.         1.
+ 0.66666667 0.72727273 0.8        0.88888889]
 
 mean value: nan
 
 key: train_fscore 
-value: [0.98039216 1.         1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [0.98876404 0.97777778 0.95454545 0.96629213 0.98901099 0.97777778
+ 0.97777778 0.97777778 0.98876404 0.96629213]
 
-mean value: 0.9980392156862745
+mean value: 0.9764779914218117
 
 key: test_precision 
-value: [0.5  0.75 1.   1.    nan  nan 0.75 1.   0.5  1.  ]
+value: [0.83333333 0.8               nan 0.8        1.         1.
+ 0.75       0.66666667 0.8        1.        ]
 
 mean value: nan
 
 key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [1.         0.97777778 0.97674419 0.97727273 0.97826087 0.97777778
+ 0.97777778 0.97777778 1.         0.97727273]
 
-mean value: 1.0
+mean value: 0.9820661621268294
 
 key: test_recall 
-value: [0.33333333 1.         1.         0.66666667        nan        nan
- 1.         1.         0.5        0.66666667]
+value: [1.  0.8 nan 0.8 1.  1.  0.6 0.8 0.8 0.8]
 
 mean value: nan
 
 key: train_recall 
+value: [0.97777778 0.97777778 0.93333333 0.95555556 1.         0.97777778
+ 0.97777778 0.97777778 0.97777778 0.95555556]
+
+mean value: 0.9711111111111111
+
+key: test_roc_auc 
 value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
   warn(
 /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
@@ -18814,86 +13751,31 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
   warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-[0.96153846 1.         1.         1.         1.         1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9961538461538462
-
-key: test_roc_auc 
-value: [0.5        0.83333333 1.         0.83333333        nan        nan
- 0.83333333 1.         0.58333333 0.83333333]
+[0.9 0.8 nan 0.8 1.  1.  0.7 0.7 0.8 0.9]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [0.98076923 1.         1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [0.98888889 0.97777778 0.95555556 0.96666667 0.98888889 0.97777778
+ 0.97777778 0.97777778 0.98888889 0.96666667]
 
-mean value: 0.9980769230769231
+mean value: 0.9766666666666667
 
 key: test_jcc 
-value: [0.25       0.75       1.         0.66666667        nan        nan
- 0.75       1.         0.33333333 0.66666667]
+value: [0.83333333 0.66666667        nan 0.66666667 1.         1.
+ 0.5        0.57142857 0.66666667 0.8       ]
 
 mean value: nan
 
 key: train_jcc 
-value: [0.96153846 1.         1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [0.97777778 0.95652174 0.91304348 0.93478261 0.97826087 0.95652174
+ 0.95652174 0.95652174 0.97777778 0.93478261]
 
-mean value: 0.9961538461538462
+mean value: 0.9542512077294686
 
-MCC on Blind test: 0.31
+MCC on Blind test: 0.55
 
-Accuracy on Blind test: 0.65
+Accuracy on Blind test: 0.8
 
 Model_name: Naive Bayes 
 Model func: BernoulliNB() 
@@ -18926,104 +13808,101 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', BernoulliNB())])
 
 key: fit_time 
-value: [0.02309561 0.00960851 0.00958014 0.00926995 0.00950646 0.01018548
- 0.00907755 0.00863814 0.00857759 0.00869775]
+value: [0.02026916 0.00899649 0.00874615 0.00913286 0.00929546 0.00914478
+ 0.00965142 0.00860023 0.00842547 0.00850058]
 
-mean value: 0.010623717308044433
+mean value: 0.010076260566711426
 
 key: score_time 
-value: [0.01342392 0.00879192 0.01012683 0.00904608 0.00474715 0.00480556
- 0.00867152 0.00860524 0.0085516  0.00846505]
+value: [0.00897002 0.00989938 0.0045855  0.00872135 0.00897264 0.00869513
+ 0.00925016 0.00857472 0.00852871 0.00845027]
 
-mean value: 0.00852348804473877
+mean value: 0.008464789390563965
 
 key: test_mcc 
-value: [ 0.          0.          0.33333333  0.                 nan         nan
-  0.70710678  0.70710678 -0.40824829  0.61237244]
+value: [0.65465367 0.21821789        nan 0.         0.40824829 0.65465367
+ 0.21821789 0.         0.40824829 0.40824829]
 
 mean value: nan
 
 key: train_mcc 
-value: [0.69436507 0.71151247 0.70064905 0.77849894 0.77151675 0.81312325
- 0.80829038 0.77151675 0.71778392 0.77540056]
+value: [0.57906602 0.73624773 0.57906602 0.60540551 0.60238451 0.56056066
+ 0.64700558 0.67082039 0.62609903 0.64700558]
 
-mean value: 0.7542657149596319
+mean value: 0.6253661066190971
 
 key: test_accuracy 
-value: [0.5        0.5        0.66666667 0.5               nan        nan
- 0.83333333 0.83333333 0.4        0.8       ]
+value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7]
 
 mean value: nan
 
 key: train_accuracy 
-value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615
- 0.90384615 0.88461538 0.8490566  0.88679245]
+value: [0.78888889 0.86666667 0.78888889 0.8        0.8        0.77777778
+ 0.82222222 0.83333333 0.81111111 0.82222222]
 
-mean value: 0.8735849056603774
+mean value: 0.8111111111111111
 
 key: test_fscore 
-value: [0.4        0.4        0.66666667 0.4               nan        nan
- 0.85714286 0.8        0.         0.85714286]
+value: [0.75       0.66666667        nan 0.54545455 0.72727273 0.75
+ 0.5        0.44444444 0.66666667 0.66666667]
 
 mean value: nan
 
 key: train_fscore 
-value: [0.84       0.82608696 0.83333333 0.875      0.88888889 0.89795918
- 0.90196078 0.88       0.83333333 0.88      ]
+value: [0.7816092  0.86046512 0.7816092  0.78571429 0.79069767 0.76190476
+ 0.81395349 0.82352941 0.8        0.81395349]
 
-mean value: 0.865656248006449
+mean value: 0.8013436617630212
 
 key: test_precision 
-value: [0.5        0.5        0.66666667 0.5               nan        nan
- 0.75       1.         0.         0.75      ]
+value: [1.         0.57142857        nan 0.5        0.66666667 1.
+ 0.66666667 0.5        0.75       0.75      ]
 
 mean value: nan
 
 key: train_precision 
-value: [0.875      0.95       0.90909091 0.95454545 0.85714286 0.95652174
- 0.92       0.91666667 0.95238095 0.91666667]
+value: [0.80952381 0.90243902 0.80952381 0.84615385 0.82926829 0.82051282
+ 0.85365854 0.875      0.85       0.85365854]
 
-mean value: 0.9208015245623942
+mean value: 0.8449738675958188
 
 key: test_recall 
-value: [0.33333333 0.33333333 0.66666667 0.33333333        nan        nan
- 1.         0.66666667 0.         1.        ]
+value: [0.6 0.8 nan 0.6 0.8 0.6 0.4 0.4 0.6 0.6]
 
 mean value: nan
 
 key: train_recall 
-value: [0.80769231 0.73076923 0.76923077 0.80769231 0.92307692 0.84615385
- 0.88461538 0.84615385 0.74074074 0.84615385]
+value: [0.75555556 0.82222222 0.75555556 0.73333333 0.75555556 0.71111111
+ 0.77777778 0.77777778 0.75555556 0.77777778]
 
-mean value: 0.8202279202279202
+mean value: 0.7622222222222222
 
 key: test_roc_auc 
-value: [0.5        0.5        0.66666667 0.5               nan        nan
- 0.83333333 0.83333333 0.33333333 0.75      ]
+value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615
- 0.90384615 0.88461538 0.8511396  0.88603989]
+value: [0.78888889 0.86666667 0.78888889 0.8        0.8        0.77777778
+ 0.82222222 0.83333333 0.81111111 0.82222222]
 
-mean value: 0.8737179487179488
+mean value: 0.8111111111111111
 
 key: test_jcc 
-value: [0.25       0.25       0.5        0.25              nan        nan
- 0.75       0.66666667 0.         0.75      ]
+value: [0.6        0.5               nan 0.375      0.57142857 0.6
+ 0.33333333 0.28571429 0.5        0.5       ]
 
 mean value: nan
 
 key: train_jcc 
-value: [0.72413793 0.7037037  0.71428571 0.77777778 0.8        0.81481481
- 0.82142857 0.78571429 0.71428571 0.78571429]
+value: [0.64150943 0.75510204 0.64150943 0.64705882 0.65384615 0.61538462
+ 0.68627451 0.7        0.66666667 0.68627451]
 
-mean value: 0.764186279875935
+mean value: 0.6693626187775545
 
-MCC on Blind test: -0.03
+MCC on Blind test: 0.18
 
-Accuracy on Blind test: 0.5
+Accuracy on Blind test: 0.65
 
 Model_name: XGBoost 
 Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
@@ -19083,54 +13962,6 @@ Traceback (most recent call last):
     raise ValueError(msg)
 ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
   warnings.warn(
 [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
                        n_estimators=1000, n_jobs=10, oob_score=True,
@@ -19165,20 +13996,20 @@ Running model pipeline: Pipeline(steps=[('prep',
                                validate_parameters=None, verbosity=0))])
 
 key: fit_time 
-value: [0.04081035 0.03964376 0.03409052 0.03344679 0.036654   0.0408752
- 0.04086256 0.03915644 0.03814292 0.04359317]
+value: [0.1605041  0.03366351 0.0360558  0.05030107 0.04023385 0.03848863
+ 0.09582305 0.07147694 0.0326159  0.06000638]
 
-mean value: 0.03872756958007813
+mean value: 0.06191692352294922
 
 key: score_time 
-value: [0.01037478 0.01039386 0.01099706 0.01184702 0.0050633  0.00477147
- 0.01168633 0.01027298 0.01030707 0.01144981]
+value: [0.01092339 0.01055479 0.00469685 0.01059723 0.01124191 0.01426959
+ 0.01316428 0.01329756 0.01405334 0.0105629 ]
 
-mean value: 0.009716367721557618
+mean value: 0.011336183547973633
 
 key: test_mcc 
-value: [1.         1.         0.70710678 1.                nan        nan
- 0.70710678 1.         1.         1.        ]
+value: [1.         0.81649658        nan 0.81649658 1.         0.81649658
+ 0.81649658 0.6        0.5        1.        ]
 
 mean value: nan
 
@@ -19188,8 +14019,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [1.         1.         0.83333333 1.                nan        nan
- 0.83333333 1.         1.         1.        ]
+value: [1.  0.9 nan 0.9 1.  0.9 0.9 0.8 0.7 1. ]
 
 mean value: nan
 
@@ -19199,8 +14029,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [1.         1.         0.8        1.                nan        nan
- 0.85714286 1.         1.         1.        ]
+value: [1.         0.90909091        nan 0.88888889 1.         0.90909091
+ 0.88888889 0.8        0.76923077 1.        ]
 
 mean value: nan
 
@@ -19210,7 +14040,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [1.   1.   1.   1.    nan  nan 0.75 1.   1.   1.  ]
+value: [1.         0.83333333        nan 1.         1.         0.83333333
+ 1.         0.8        0.625      1.        ]
 
 mean value: nan
 
@@ -19220,8 +14051,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [1.         1.         0.66666667 1.                nan        nan
- 1.         1.         1.         1.        ]
+value: [1.  1.  nan 0.8 1.  1.  0.8 0.8 1.  1. ]
 
 mean value: nan
 
@@ -19231,8 +14061,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [1.         1.         0.83333333 1.                nan        nan
- 0.83333333 1.         1.         1.        ]
+value: [1.  0.9 nan 0.9 1.  0.9 0.9 0.8 0.7 1. ]
 
 mean value: nan
 
@@ -19242,8 +14071,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [1.         1.         0.66666667 1.                nan        nan
- 0.75       1.         1.         1.        ]
+value: [1.         0.83333333        nan 0.8        1.         0.83333333
+ 0.8        0.66666667 0.625      1.        ]
 
 mean value: nan
 
@@ -19326,54 +14155,6 @@ KeyError: 'predict'
 
 During handling of the above exception, another exception occurred:
 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
     scores = scorer(estimator, X_test, y_test)
@@ -19413,54 +14194,6 @@ Traceback (most recent call last):
     raise ValueError(msg)
 ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
   warnings.warn(
 Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
@@ -19479,103 +14212,100 @@ Pipeline(steps=[('prep',
                 ('model', LinearDiscriminantAnalysis())])
 
 key: fit_time 
-value: [0.03194928 0.03370738 0.03454018 0.0320971  0.03061795 0.03505969
- 0.03510022 0.03437901 0.04472756 0.04004741]
+value: [0.04766369 0.03517723 0.03765726 0.03865457 0.03713489 0.0370903
+ 0.03802204 0.03782034 0.03993273 0.03703785]
 
-mean value: 0.03522257804870606
+mean value: 0.03861908912658692
 
 key: score_time 
-value: [0.02213168 0.0220046  0.02188015 0.0226388  0.00919867 0.00595093
- 0.02261281 0.022228   0.02384186 0.02255106]
+value: [0.02126861 0.02220416 0.01126933 0.02398229 0.02256036 0.02334976
+ 0.02145195 0.02393079 0.02469134 0.02361798]
 
-mean value: 0.019503855705261232
+mean value: 0.021832656860351563
 
 key: test_mcc 
-value: [0.70710678 0.70710678 0.         1.                nan        nan
- 1.         0.33333333 1.         0.61237244]
+value: [0.40824829 0.81649658        nan 0.40824829 1.         0.65465367
+ 0.81649658 0.65465367 0.21821789 0.6       ]
 
 mean value: nan
 
 key: train_mcc 
-value: [0.96225045 1.         1.         1.         1.         0.92307692
+value: [1.         0.97801929 1.         1.         1.         1.
  1.         1.         1.         1.        ]
 
-mean value: 0.9885327371726299
+mean value: 0.9978019293843652
 
 key: test_accuracy 
-value: [0.83333333 0.83333333 0.5        1.                nan        nan
- 1.         0.66666667 1.         0.8       ]
+value: [0.7 0.9 nan 0.7 1.  0.8 0.9 0.8 0.6 0.8]
 
 mean value: nan
 
 key: train_accuracy 
-value: [0.98076923 1.         1.         1.         1.         0.96153846
+value: [1.         0.98888889 1.         1.         1.         1.
  1.         1.         1.         1.        ]
 
-mean value: 0.9942307692307693
+mean value: 0.9988888888888889
 
 key: test_fscore 
-value: [0.85714286 0.85714286 0.57142857 1.                nan        nan
- 1.         0.66666667 1.         0.85714286]
+value: [0.72727273 0.88888889        nan 0.72727273 1.         0.83333333
+ 0.90909091 0.83333333 0.66666667 0.8       ]
 
 mean value: nan
 
 key: train_fscore 
-value: [0.98113208 1.         1.         1.         1.         0.96153846
+value: [1.         0.98901099 1.         1.         1.         1.
  1.         1.         1.         1.        ]
 
-mean value: 0.994267053701016
+mean value: 0.9989010989010989
 
 key: test_precision 
-value: [0.75       0.75       0.5        1.                nan        nan
- 1.         0.66666667 1.         0.75      ]
+value: [0.66666667 1.                nan 0.66666667 1.         0.71428571
+ 0.83333333 0.71428571 0.57142857 0.8       ]
 
 mean value: nan
 
 key: train_precision 
-value: [0.96296296 1.         1.         1.         1.         0.96153846
+value: [1.         0.97826087 1.         1.         1.         1.
  1.         1.         1.         1.        ]
 
-mean value: 0.9924501424501424
+mean value: 0.9978260869565218
 
 key: test_recall 
-value: [1.         1.         0.66666667 1.                nan        nan
- 1.         0.66666667 1.         1.        ]
+value: [0.8 0.8 nan 0.8 1.  1.  1.  1.  0.8 0.8]
 
 mean value: nan
 
 key: train_recall 
-value: [1.         1.         1.         1.         1.         0.96153846
- 1.         1.         1.         1.        ]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.9961538461538462
+mean value: 1.0
 
 key: test_roc_auc 
-value: [0.83333333 0.83333333 0.5        1.                nan        nan
- 1.         0.66666667 1.         0.75      ]
+value: [0.7 0.9 nan 0.7 1.  0.8 0.9 0.8 0.6 0.8]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [0.98076923 1.         1.         1.         1.         0.96153846
+value: [1.         0.98888889 1.         1.         1.         1.
  1.         1.         1.         1.        ]
 
-mean value: 0.9942307692307693
+mean value: 0.9988888888888889
 
 key: test_jcc 
-value: [0.75 0.75 0.4  1.    nan  nan 1.   0.5  1.   0.75]
+value: [0.57142857 0.8               nan 0.57142857 1.         0.71428571
+ 0.83333333 0.71428571 0.5        0.66666667]
 
 mean value: nan
 
 key: train_jcc 
-value: [0.96296296 1.         1.         1.         1.         0.92592593
+value: [1.         0.97826087 1.         1.         1.         1.
  1.         1.         1.         1.        ]
 
-mean value: 0.9888888888888889
+mean value: 0.9978260869565218
 
-MCC on Blind test: 0.19
+MCC on Blind test: -0.1
 
-Accuracy on Blind test: 0.62
+Accuracy on Blind test: 0.48
 
 Model_name: Multinomial 
 Model func: MultinomialNB() 
@@ -19608,54 +14338,70 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', MultinomialNB())])
 
 key: fit_time 
-value: [0.0098958  0.00960755 0.008389   0.00844193 0.00868154 0.00891495
- 0.00903392 0.00872874 0.00954485 0.00864506]
+value: [0.01478887 0.00884414 0.00876284 0.00883818 0.00909948 0.00853968
+ 0.00871682 0.00872874 0.00913668 0.00861716]
 
-mean value: 0.008988332748413087
+mean value: 0.009407258033752442
 
 key: score_time 
-value: [0.00995326 0.00929236 0.00838757 0.00841975 0.00438452 0.00442958
- 0.00843525 0.00857377 0.00896025 0.00875926]
+value: [0.008955   0.00877619 0.00435042 0.00857329 0.00864887 0.0086658
+ 0.00855088 0.00852728 0.00936174 0.00867295]
 
-mean value: 0.007959556579589844
+mean value: 0.00830824375152588
 
 key: test_mcc 
-value: [0.         0.33333333 0.4472136  0.                nan        nan
- 0.         1.         0.61237244 1.        ]
+value: [0.21821789 0.40824829        nan 0.2        0.81649658 1.
+ 0.21821789 0.         0.40824829 0.21821789]
 
 mean value: nan
 
 key: train_mcc 
-value: [0.70064905 0.57735027 0.73131034 0.77151675 0.6172134  0.73131034
- 0.6172134  0.65433031 0.70042867 0.77540056]
+value: [0.64508188 0.53452248 0.55776344 0.55776344 0.53452248 0.51161666
+ 0.57906602 0.60238451 0.62609903 0.53452248]
 
-mean value: 0.6876723088831835
+mean value: 0.5683342428673076
 
 key: test_accuracy 
-value: [0.5        0.66666667 0.66666667 0.5               nan        nan
- 0.5        1.         0.8        1.        ]
+value: [0.6 0.7 nan 0.6 0.9 1.  0.6 0.5 0.7 0.6]
 
 mean value: nan
 
 key: train_accuracy 
-value: [0.84615385 0.78846154 0.86538462 0.88461538 0.80769231 0.86538462
- 0.80769231 0.82692308 0.8490566  0.88679245]
+value: [0.82222222 0.76666667 0.77777778 0.77777778 0.76666667 0.75555556
+ 0.78888889 0.8        0.81111111 0.76666667]
 
-mean value: 0.8428156748911466
+mean value: 0.7833333333333333
 
 key: test_fscore 
-value: [0.4        0.66666667 0.75       0.4               nan        nan
- 0.57142857 1.         0.66666667 1.        ]
+value: [0.66666667 0.72727273        nan 0.6        0.90909091 1.
+ 0.5        0.44444444 0.72727273 0.5       ]
 
 mean value: nan
 
 key: train_fscore 
-value: [0.83333333 0.78431373 0.8627451  0.88       0.8        0.8627451
- 0.8        0.82352941 0.84615385 0.88      ]
+value: [0.81818182 0.75862069 0.76744186 0.76744186 0.75862069 0.75
+ 0.7816092  0.79069767 0.8        0.75862069]
 
-mean value: 0.8372820512820512
+mean value: 0.7751234477898471
 
 key: test_precision 
+value: [0.57142857 0.66666667        nan 0.6        0.83333333 1.
+ 0.66666667 0.5        0.66666667 0.66666667]
+
+mean value: nan
+
+key: train_precision 
+value: [0.8372093  0.78571429 0.80487805 0.80487805 0.78571429 0.76744186
+ 0.80952381 0.82926829 0.85       0.78571429]
+
+mean value: 0.8060342219701266
+
+key: test_recall 
+value: [0.8 0.8 nan 0.6 1.  1.  0.4 0.4 0.8 0.4]
+
+mean value: nan
+
+key: train_recall 
 value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
@@ -19664,6 +14410,403 @@ KeyError: 'predict'
 
 During handling of the above exception, another exception occurred:
 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+[0.8        0.73333333 0.73333333 0.73333333 0.73333333 0.73333333
+ 0.75555556 0.75555556 0.75555556 0.73333333]
+
+mean value: 0.7466666666666666
+
+key: test_roc_auc 
+value: [0.6 0.7 nan 0.6 0.9 1.  0.6 0.5 0.7 0.6]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [0.82222222 0.76666667 0.77777778 0.77777778 0.76666667 0.75555556
+ 0.78888889 0.8        0.81111111 0.76666667]
+
+mean value: 0.7833333333333333
+
+key: test_jcc 
+value: [0.5        0.57142857        nan 0.42857143 0.83333333 1.
+ 0.33333333 0.28571429 0.57142857 0.33333333]
+
+mean value: nan
+
+key: train_jcc 
+value: [0.69230769 0.61111111 0.62264151 0.62264151 0.61111111 0.6
+ 0.64150943 0.65384615 0.66666667 0.61111111]
+
+mean value: 0.6332946298984035
+
+MCC on Blind test: 0.05
+
+Accuracy on Blind test: 0.57
+
+Model_name: Passive Aggresive 
+Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model',
+                 PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
+
+key: fit_time 
+value: [0.01021981 0.01365471 0.01286817 0.01448369 0.01383781 0.01418757
+ 0.01293206 0.01413035 0.0132041  0.01391315]
+
+mean value: 0.013343143463134765
+
+key: score_time 
+value: [0.00872874 0.01148295 0.00617766 0.01167202 0.01157427 0.01153183
+ 0.01140785 0.01148415 0.01151109 0.01141477]
+
+mean value: 0.010698533058166504
+
+key: test_mcc 
+value: [0.40824829 0.5               nan 0.40824829 0.81649658 0.81649658
+ 0.65465367 0.81649658 0.21821789 1.        ]
+
+mean value: nan
+
+key: train_mcc 
+value: [0.93356387 0.67202151 0.88910845 0.97801929 0.97801929 0.95650071
+ 0.95650071 0.81649658 0.95650071 0.87447463]
+
+mean value: 0.9011205769973302
+
+key: test_accuracy 
+value: [0.7 0.7 nan 0.7 0.9 0.9 0.8 0.9 0.6 1. ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [0.96666667 0.81111111 0.94444444 0.98888889 0.98888889 0.97777778
+ 0.97777778 0.9        0.97777778 0.93333333]
+
+mean value: 0.9466666666666667
+
+key: test_fscore 
+value: [0.72727273 0.76923077        nan 0.66666667 0.90909091 0.88888889
+ 0.75       0.88888889 0.66666667 1.        ]
+
+mean value: nan
+
+key: train_fscore 
+value: [0.96629213 0.8411215  0.94382022 0.98876404 0.98901099 0.97727273
+ 0.97727273 0.88888889 0.97826087 0.92857143]
+
+mean value: 0.9479275530403464
+
+key: test_precision 
+value: [0.66666667 0.625             nan 0.75       0.83333333 1.
+ 1.         1.         0.57142857 1.        ]
+
+mean value: nan
+
+key: train_precision 
+value: [0.97727273 0.72580645 0.95454545 1.         0.97826087 1.
+ 1.         1.         0.95744681 1.        ]
+
+mean value: 0.9593332311506941
+
+key: test_recall 
+value: [0.8 1.  nan 0.6 1.  0.8 0.6 0.8 0.8 1. ]
+
+mean value: nan
+
+key: train_recall 
+value: [0.95555556 1.         0.93333333 0.97777778 1.         0.95555556
+ 0.95555556 0.8        1.         0.86666667]
+
+mean value: 0.9444444444444444
+
+key: test_roc_auc 
+value: [0.7 0.7 nan 0.7 0.9 0.9 0.8 0.9 0.6 1. ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [0.96666667 0.81111111 0.94444444 0.98888889 0.98888889 0.97777778
+ 0.97777778 0.9        0.97777778 0.93333333]
+
+mean value: 0.9466666666666668
+
+key: test_jcc 
+value: [0.57142857 0.625             nan 0.5        0.83333333 0.8
+ 0.6        0.8        0.5        1.        ]
+
+mean value: nan
+
+key: train_jcc 
+value: [0.93478261 0.72580645 0.89361702 0.97777778 0.97826087 0.95555556
+ 0.95555556 0.8        0.95744681 0.86666667]
+
+mean value: 0.9045469315216562
+
+MCC on Blind test: 0.42
+
+Accuracy on Blind test: 0.75
+
+Model_name: Stochastic GDescent 
+Model func: SGDClassifier(n_jobs=10, random_state=42) 
+List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', SGDClassifier(n_jobs=10, random_state=42))])
+
+key: fit_time 
+value: [0.01397181 0.01273942 0.01281476 0.01423192 0.0125308  0.01231194
+ 0.01235962 0.01259017 0.01245999 0.01275468]
+
+mean value: 0.012876510620117188
+
+key: score_time 
+value: [0.01007605 0.01145029 0.00625587 0.01167154 0.01142955 0.01144981
+ 0.01150846 0.01144075 0.01146603 0.01146483]
+
+mean value: 0.010821318626403809
+
+key: test_mcc 
+value: [ 1.          0.40824829         nan -0.33333333  0.81649658  1.
+  0.65465367  0.81649658  0.21821789  1.        ]
+
+mean value: nan
+
+key: train_mcc 
+value: [0.91111111 0.93356387 0.93541435 0.33333333 0.95650071 0.91111111
+ 0.72486118 0.91473203 1.         0.97801929]
+
+mean value: 0.8598646994997842
+
+key: test_accuracy 
+value: [1.  0.7 nan 0.4 0.9 1.  0.8 0.9 0.6 1. ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [0.95555556 0.96666667 0.96666667 0.6        0.97777778 0.95555556
+ 0.84444444 0.95555556 1.         0.98888889]
+
+mean value: 0.9211111111111111
+
+key: test_fscore 
+value: [1.         0.72727273        nan 0.         0.90909091 1.
+ 0.75       0.90909091 0.66666667 1.        ]
+
+mean value: nan
+
+key: train_fscore 
+value: [0.95555556 0.96629213 0.96551724 0.33333333 0.97826087 0.95555556
+ 0.81578947 0.95348837 1.         0.98876404]
+
+mean value: 0.8912556580941488
+
+key: test_precision 
+value: [1.         0.66666667        nan 0.         0.83333333 1.
+ 1.         0.83333333 0.57142857 1.        ]
+
+mean value: nan
+
+key: train_precision 
+value: [0.95555556 0.97727273 1.         1.         0.95744681 0.95555556
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9845830646894477
+
+key: test_recall 
+value: [1.  0.8 nan 0.  1.  1.  0.6 1.  0.8 1. ]
+
+mean value: nan
+
+key: train_recall 
+value: [0.95555556 0.95555556 0.93333333 0.2        1.         0.95555556
+ 0.68888889 0.91111111 1.         0.97777778]
+
+mean value: 0.8577777777777778
+
+key: test_roc_auc 
+value: [1.  0.7 nan 0.4 0.9 1.  0.8 0.9 0.6 1. ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [0.95555556 0.96666667 0.96666667 0.6        0.97777778 0.95555556
+ 0.84444444 0.95555556 1.         0.98888889]
+
+mean value: 0.9211111111111112
+
+key: test_jcc 
+value: [1.         0.57142857        nan 0.         0.83333333 1.
+ 0.6        0.83333333 0.5        1.        ]
+
+mean value: nan
+
+key: train_jcc 
+value: [0.91489362 0.93478261 0.93333333 0.2        0.95744681 0.91489362
+ 0.68888889 0.91111111 1.         0.97777778]
+
+mean value: 0.8433127762359954
+
+MCC on Blind test: 0.36
+
+Accuracy on Blind test: 0.72
+
+Model_name: AdaBoost Classifier 
+Model func: AdaBoostClassifier(random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
     scores = scorer(estimator, X_test, y_test)
@@ -19749,61 +14892,117 @@ Traceback (most recent call last):
     X_int, X_mask = self._transform(
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
     raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
   warnings.warn(
-[0.5        0.66666667 0.6        0.5               nan        nan
- 0.5        1.         1.         1.        ]
+Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', AdaBoostClassifier(random_state=42))])
+
+key: fit_time 
+value: [0.09748411 0.08118486 0.08655453 0.08261776 0.08569789 0.08832264
+ 0.08770275 0.0874753  0.09288096 0.08781147]
+
+mean value: 0.08777322769165039
+
+key: score_time 
+value: [0.01493359 0.01460385 0.0050025  0.01509309 0.01566195 0.01640439
+ 0.01560545 0.01591015 0.01580977 0.01584244]
+
+mean value: 0.01448671817779541
+
+key: test_mcc 
+value: [0.81649658 0.6               nan 0.81649658 1.         0.81649658
+ 0.81649658 0.40824829 0.5        1.        ]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [0.9 0.8 nan 0.9 1.  0.9 0.9 0.7 0.7 1. ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [0.90909091 0.8               nan 0.88888889 1.         0.90909091
+ 0.88888889 0.72727273 0.76923077 1.        ]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [0.83333333 0.8               nan 1.         1.         0.83333333
+ 1.         0.66666667 0.625      1.        ]
 
 mean value: nan
 
 key: train_precision 
-value: [0.90909091 0.8        0.88       0.91666667 0.83333333 0.88
- 0.83333333 0.84       0.88       0.91666667]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.8689090909090909
+mean value: 1.0
 
 key: test_recall 
-value: [0.33333333 0.66666667 1.         0.33333333        nan        nan
- 0.66666667 1.         0.5        1.        ]
+value: [1.  0.8 nan 0.8 1.  1.  0.8 0.8 1.  1. ]
 
 mean value: nan
 
 key: train_recall 
-value: [0.76923077 0.76923077 0.84615385 0.84615385 0.76923077 0.84615385
- 0.76923077 0.80769231 0.81481481 0.84615385]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.8084045584045584
+mean value: 1.0
 
 key: test_roc_auc 
-value: [0.5        0.66666667 0.66666667 0.5               nan        nan
- 0.5        1.         0.75       1.        ]
+value: [0.9 0.8 nan 0.9 1.  0.9 0.9 0.7 0.7 1. ]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [0.84615385 0.78846154 0.86538462 0.88461538 0.80769231 0.86538462
- 0.80769231 0.82692308 0.8497151  0.88603989]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.8428062678062679
+mean value: 1.0
 
 key: test_jcc 
-value: [0.25 0.5  0.6  0.25  nan  nan 0.4  1.   0.5  1.  ]
+value: [0.83333333 0.66666667        nan 0.8        1.         0.83333333
+ 0.8        0.57142857 0.625      1.        ]
 
 mean value: nan
 
 key: train_jcc 
-value: [0.71428571 0.64516129 0.75862069 0.78571429 0.66666667 0.75862069
- 0.66666667 0.7        0.73333333 0.78571429]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.7214783622013877
+mean value: 1.0
 
-MCC on Blind test: 0.0
+MCC on Blind test: 0.78
 
-Accuracy on Blind test: 0.5
+Accuracy on Blind test: 0.9
 
-Model_name: Passive Aggresive 
-Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42) 
+Model_name: Bagging Classifier 
+Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42) 
 List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
                        n_estimators=1000, n_jobs=10, oob_score=True,
                        random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
@@ -19831,59 +15030,56 @@ Running model pipeline: Pipeline(steps=[('prep',
        'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
       dtype='object'))])),
                 ('model',
-                 PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
+                 BaggingClassifier(n_jobs=10, oob_score=True,
+                                   random_state=42))])
 
 key: fit_time 
-value: [0.00921822 0.01226902 0.01219916 0.01230478 0.01273918 0.01236653
- 0.01281667 0.01253247 0.01347136 0.0127697 ]
+value: [0.03620768 0.03868723 0.02701831 0.0291822  0.0329349  0.03723025
+ 0.05369878 0.03093934 0.03043032 0.04312468]
 
-mean value: 0.012268710136413574
+mean value: 0.03594536781311035
 
 key: score_time 
-value: [0.00918436 0.01127267 0.01138401 0.01152658 0.00610924 0.00630403
- 0.01162601 0.01155329 0.01138902 0.01136518]
+value: [0.01804686 0.01641774 0.00487351 0.02259183 0.02211642 0.03795409
+ 0.03202295 0.02688098 0.03348565 0.03866339]
 
-mean value: 0.01017143726348877
+mean value: 0.025305342674255372
 
 key: test_mcc 
-value: [0.33333333 0.70710678 0.4472136  0.70710678        nan        nan
- 0.4472136  1.         1.         0.61237244]
+value: [1.         0.81649658        nan 0.81649658 1.         0.81649658
+ 0.81649658 0.40824829 0.65465367 1.        ]
 
 mean value: nan
 
 key: train_mcc 
-value: [1.         0.89056356 1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.9890563556561721
+mean value: 1.0
 
 key: test_accuracy 
-value: [0.66666667 0.83333333 0.66666667 0.83333333        nan        nan
- 0.66666667 1.         1.         0.8       ]
+value: [1.  0.9 nan 0.9 1.  0.9 0.9 0.7 0.8 1. ]
 
 mean value: nan
 
 key: train_accuracy 
-value: [1.         0.94230769 1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.9942307692307693
+mean value: 1.0
 
 key: test_fscore 
-value: [0.66666667 0.8        0.75       0.8               nan        nan
- 0.5        1.         1.         0.85714286]
+value: [1.         0.90909091        nan 0.88888889 1.         0.90909091
+ 0.88888889 0.72727273 0.83333333 1.        ]
 
 mean value: nan
 
 key: train_fscore 
-value: [1.         0.93877551 1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.9938775510204082
+mean value: 1.0
 
 key: test_precision 
-value: [0.66666667 1.         0.6        1.                nan        nan
- 1.         1.         1.         0.75      ]
+value: [1.         0.83333333        nan 1.         1.         0.83333333
+ 1.         0.66666667 0.71428571 1.        ]
 
 mean value: nan
 
@@ -19893,47 +15089,42 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.66666667 0.66666667 1.         0.66666667        nan        nan
- 0.33333333 1.         1.         1.        ]
+value: [1.  1.  nan 0.8 1.  1.  0.8 0.8 1.  1. ]
 
 mean value: nan
 
 key: train_recall 
-value: [1.         0.88461538 1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.9884615384615385
+mean value: 1.0
 
 key: test_roc_auc 
-value: [0.66666667 0.83333333 0.66666667 0.83333333        nan        nan
- 0.66666667 1.         1.         0.75      ]
+value: [1.  0.9 nan 0.9 1.  0.9 0.9 0.7 0.8 1. ]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [1.         0.94230769 1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.9942307692307693
+mean value: 1.0
 
 key: test_jcc 
-value: [0.5        0.66666667 0.6        0.66666667        nan        nan
- 0.33333333 1.         1.         0.75      ]
+value: [1.         0.83333333        nan 0.8        1.         0.83333333
+ 0.8        0.57142857 0.71428571 1.        ]
 
 mean value: nan
 
 key: train_jcc 
-value: [1.         0.88461538 1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.9884615384615385
+mean value: 1.0
 
-MCC on Blind test: 0.17
+MCC on Blind test: 0.95
 
-Accuracy on Blind test: 0.52
+Accuracy on Blind test: 0.98
 
-Model_name: Stochastic GDescent 
-Model func: SGDClassifier(n_jobs=10, random_state=42) 
+Model_name: Gaussian Process 
+Model func: GaussianProcessClassifier(random_state=42) 
 List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
@@ -20027,7 +15218,7 @@ Traceback (most recent call last):
     X_int, X_mask = self._transform(
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
     raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
   warnings.warn(
 [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
@@ -20042,676 +15233,6 @@ ValueError: Found unknown categories ['Other'] in column 5 during transform
               reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
               tree_method='exact', use_label_encoder=False,
               validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', SGDClassifier(n_jobs=10, random_state=42))])
-
-key: fit_time 
-value: [0.01245952 0.01190948 0.01206875 0.01187015 0.01200151 0.01223326
- 0.01400733 0.01236725 0.01193714 0.01187801]
-
-mean value: 0.012273240089416503
-
-key: score_time 
-value: [0.0105114  0.01141214 0.01137328 0.01140285 0.00604272 0.0061481
- 0.01152825 0.01161933 0.01146436 0.0113945 ]
-
-mean value: 0.010289692878723144
-
-key: test_mcc 
-value: [0.70710678 0.70710678 0.4472136  0.70710678        nan        nan
- 0.4472136  0.70710678 1.         1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [1.         1.         1.         1.         1.         1.
- 0.54772256 0.79056942 1.         0.92704716]
-
-mean value: 0.926533913727155
-
-key: test_accuracy 
-value: [0.83333333 0.83333333 0.66666667 0.83333333        nan        nan
- 0.66666667 0.83333333 1.         1.        ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1.         1.         1.         1.         1.         1.
- 0.73076923 0.88461538 1.         0.96226415]
-
-mean value: 0.9577648766328012
-
-key: test_fscore 
-value: [0.85714286 0.85714286 0.75       0.8               nan        nan
- 0.5        0.8        1.         1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [1.         1.         1.         1.         1.         1.
- 0.63157895 0.86956522 1.         0.96      ]
-
-mean value: 0.9461144164759725
-
-key: test_precision 
-value: [0.75 0.75 0.6  1.    nan  nan 1.   1.   1.   1.  ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [1.         1.         1.         0.66666667        nan        nan
- 0.33333333 0.66666667 1.         1.        ]
-
-mean value: nan
-
-key: train_recall 
-value: [1.         1.         1.         1.         1.         1.
- 0.46153846 0.76923077 1.         0.92307692]
-
-mean value: 0.9153846153846154
-
-key: test_roc_auc 
-value: [0.83333333 0.83333333 0.66666667 0.83333333        nan        nan
- 0.66666667 0.83333333 1.         1.        ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1.         1.         1.         1.         1.         1.
- 0.73076923 0.88461538 1.         0.96153846]
-
-mean value: 0.9576923076923077
-
-key: test_jcc 
-value: [0.75       0.75       0.6        0.66666667        nan        nan
- 0.33333333 0.66666667 1.         1.        ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1.         1.         1.         1.         1.         1.
- 0.46153846 0.76923077 1.         0.92307692]
-
-mean value: 0.9153846153846154
-
-MCC on Blind test: 0.24
-
-Accuracy on Blind test: 0.57
-
-Model_name: AdaBoost Classifier 
-Model func: AdaBoostClassifier(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model', AdaBoostClassifier(random_state=42))])
-
-key: fit_time 
-value: [0.08268571 0.07317615 0.07111573 0.07205319 0.07330489 0.07189631
- 0.07267833 0.07267451 0.07413673 0.07375813]
-
-mean value: 0.07374796867370606
-
-key: score_time 
-value: [0.01459098 0.01475048 0.01495194 0.0147922  0.00463033 0.00474453
- 0.01556635 0.01505351 0.01488471 0.01495194]
-
-mean value: 0.012891697883605956
-
-key: test_mcc 
-value: [1.         1.         0.70710678 1.                nan        nan
- 0.70710678 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_accuracy 
-value: [1.         1.         0.83333333 1.                nan        nan
- 0.83333333 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_fscore 
-value: [1.         1.         0.8        1.                nan        nan
- 0.85714286 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_precision 
-value: [1.   1.   1.   1.    nan  nan 0.75 1.   1.   1.  ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [1.         1.         0.66666667 1.                nan        nan
- 1.         1.         1.         1.        ]
-
-mean value: nan
-
-key: train_recall 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_roc_auc 
-value: [1.         1.         0.83333333 1.                nan        nan
- 0.83333333 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_jcc 
-value: [1.         1.         0.66666667 1.                nan        nan
- 0.75       1.         1.         1.        ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-MCC on Blind test: 0.73
-
-Accuracy on Blind test: 0.88
-
-Model_name: Bagging Classifier 
-Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
-                 ColumnTransformer(remainder='passthrough',
-                                   transformers=[('num', MinMaxScaler(),
-                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
-       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
-       'mcsm_na_affinity', 'rsa',
-       ...
-       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
-       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
-      dtype='object', length=167)),
-                                                 ('cat', OneHotEncoder(),
-                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
-       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
-      dtype='object'))])),
-                ('model',
-                 BaggingClassifier(n_jobs=10, oob_score=True,
-                                   random_state=42))])
-
-key: fit_time 
-value: [0.03236103 0.03303695 0.03448248 0.02975869 0.04348493 0.04276633
- 0.04121518 0.0335412  0.02573681 0.03820777]
-
-mean value: 0.035459136962890624
-
-key: score_time 
-value: [0.02469873 0.02373099 0.02471375 0.0250423  0.00463319 0.00474143
- 0.01605988 0.02687597 0.01719236 0.02195811]
-
-mean value: 0.018964672088623048
-
-key: test_mcc 
-value: [1.         1.         0.70710678 0.70710678        nan        nan
- 0.70710678 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_mcc 
-value: [1.         0.96225045 0.96225045 1.         1.         1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9924500897298753
-
-key: test_accuracy 
-value: [1.         1.         0.83333333 0.83333333        nan        nan
- 0.83333333 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_accuracy 
-value: [1.         0.98076923 0.98076923 1.         1.         1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9961538461538462
-
-key: test_fscore 
-value: [1.         1.         0.8        0.8               nan        nan
- 0.85714286 1.         1.         1.        ]
-
-mean value: nan
-
-key: train_fscore 
-value: [1.         0.98039216 0.98039216 1.         1.         1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.996078431372549
-
-key: test_precision 
-value: [1.   1.   1.   1.    nan  nan 0.75 1.   1.   1.  ]
-
-mean value: nan
-
-key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
-
-mean value: 1.0
-
-key: test_recall 
-value: [1.         1.         0.66666667 0.66666667        nan        nan
- 1.         1.         1.         1.        ]
-
-mean value: nan
-
-key: train_recall 
-value: [1.         0.96153846 0.96153846 1.         1.         1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9923076923076923
-
-key: test_roc_auc 
-value: [1.         1.         0.83333333 0.83333333        nan        nan
- 0.83333333 1.         1.         1.        ]
-
-mean value: nan
-
-key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
-  _warn_prf(average, modifier, msg_start, len(result))
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-train_roc_auc 
-value: [1.         0.98076923 0.98076923 1.         1.         1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9961538461538462
-
-key: test_jcc 
-value: [1.         1.         0.66666667 0.66666667        nan        nan
- 0.75       1.         1.         1.        ]
-
-mean value: nan
-
-key: train_jcc 
-value: [1.         0.96153846 0.96153846 1.         1.         1.
- 1.         1.         1.         1.        ]
-
-mean value: 0.9923076923076923
-
-MCC on Blind test: 0.95
-
-Accuracy on Blind test: 0.98
-
-Model_name: Gaussian Process 
-Model func: GaussianProcessClassifier(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
-                       n_estimators=1000, n_jobs=10, oob_score=True,
-                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
-              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
-              gamma=0, gpu_id=-1, importance_type=None,
-              interaction_constraints='', learning_rate=0.300000012,
-              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
-              monotone_constraints='()', n_estimators=100, n_jobs=12,
-              num_parallel_tree=1, predictor='auto', random_state=42,
-              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
-              tree_method='exact', use_label_encoder=False,
-              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
 Running model pipeline: Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
                                    transformers=[('num', MinMaxScaler(),
@@ -20729,20 +15250,20 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', GaussianProcessClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.01034498 0.01023507 0.01016378 0.010077   0.01010346 0.01009202
- 0.01004457 0.01008701 0.01006556 0.01012492]
+value: [0.01397204 0.02746344 0.01704359 0.01630187 0.01670456 0.01774883
+ 0.01657462 0.01662517 0.03054595 0.01816249]
 
-mean value: 0.010133838653564453
+mean value: 0.019114255905151367
 
 key: score_time 
-value: [0.00860214 0.0086844  0.00934911 0.00860739 0.00424981 0.00425982
- 0.00847292 0.00851202 0.00853586 0.008564  ]
+value: [0.01164699 0.01156616 0.00662804 0.01197863 0.01218772 0.01199245
+ 0.01217413 0.01204348 0.02131319 0.02032018]
 
-mean value: 0.007783746719360352
+mean value: 0.01318509578704834
 
 key: test_mcc 
-value: [-0.4472136   0.33333333  0.33333333  0.                 nan         nan
-  0.          0.4472136   0.16666667  0.66666667]
+value: [0.81649658 0.6               nan 0.2        0.65465367 0.81649658
+ 0.2        0.5        0.         0.81649658]
 
 mean value: nan
 
@@ -20752,8 +15273,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.33333333 0.66666667 0.66666667 0.5               nan        nan
- 0.5        0.66666667 0.6        0.8       ]
+value: [0.9 0.8 nan 0.6 0.8 0.9 0.6 0.7 0.5 0.9]
 
 mean value: nan
 
@@ -20763,8 +15283,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.         0.66666667 0.66666667 0.                nan        nan
- 0.57142857 0.5        0.5        0.8       ]
+value: [0.88888889 0.8               nan 0.6        0.83333333 0.88888889
+ 0.6        0.76923077 0.61538462 0.88888889]
 
 mean value: nan
 
@@ -20774,8 +15294,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.         0.66666667 0.66666667 0.                nan        nan
- 0.5        1.         0.5        1.        ]
+value: [1.         0.8               nan 0.6        0.71428571 1.
+ 0.6        0.625      0.5        1.        ]
 
 mean value: nan
 
@@ -20785,8 +15305,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.         0.66666667 0.66666667 0.                nan        nan
- 0.66666667 0.33333333 0.5        0.66666667]
+value: [0.8 0.8 nan 0.6 1.  0.8 0.6 1.  0.8 0.8]
 
 mean value: nan
 
@@ -20796,8 +15315,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.33333333 0.66666667 0.66666667 0.5               nan        nan
- 0.5        0.66666667 0.58333333 0.83333333]
+value: [0.9 0.8 nan 0.6 0.8 0.9 0.6 0.7 0.5 0.9]
 
 mean value: nan
 
@@ -20807,8 +15325,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.         0.5        0.5        0.                nan        nan
- 0.4        0.33333333 0.33333333 0.66666667]
+value: [0.8        0.66666667        nan 0.42857143 0.71428571 0.8
+ 0.42857143 0.625      0.44444444 0.8       ]
 
 mean value: nan
 
@@ -20817,9 +15335,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: 0.0
+MCC on Blind test: 0.01
 
-Accuracy on Blind test: 0.5
+Accuracy on Blind test: 0.52
 
 Model_name: Gradient Boosting 
 Model func: GradientBoostingClassifier(random_state=42) 
@@ -20835,221 +15353,7 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), (
               reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
               tree_method='exact', use_label_encoder=False,
               validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['XDR'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
-  warnings.warn("Variables are collinear")
-Pipeline(steps=[('prep',
+Running model pipeline: Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
                                    transformers=[('num', MinMaxScaler(),
                                                   Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
@@ -21066,20 +15370,92 @@ Pipeline(steps=[('prep',
                 ('model', GradientBoostingClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.13452578 0.13629317 0.09958744 0.12072825 0.12380791 0.09092355
- 0.10838723 0.11978316 0.1374228  0.13621712]
+value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
 
-mean value: 0.12076764106750489
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
+  _warn_prf(average, modifier, msg_start, len(result))
+[0.20379448 0.20308447 0.20512509 0.20450664 0.19911671 0.19699526
+ 0.18761802 0.18994236 0.19499803 0.18119597]
+
+mean value: 0.19663770198822023
 
 key: score_time 
-value: [0.00908279 0.00892663 0.00909948 0.00916409 0.00457883 0.00456905
- 0.00913429 0.00951099 0.00891829 0.00896835]
+value: [0.00957632 0.01017761 0.00509214 0.01043415 0.01005363 0.00927019
+ 0.01004076 0.00947762 0.01011539 0.01008415]
 
-mean value: 0.008195281028747559
+mean value: 0.009432196617126465
 
 key: test_mcc 
-value: [0.70710678 0.70710678 0.70710678 1.                nan        nan
- 0.70710678 1.         1.         1.        ]
+value: [0.81649658 0.81649658        nan 0.6        1.         0.81649658
+ 0.81649658 0.40824829 0.65465367 1.        ]
 
 mean value: nan
 
@@ -21089,8 +15465,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.83333333 0.83333333 0.83333333 1.                nan        nan
- 0.83333333 1.         1.         1.        ]
+value: [0.9 0.9 nan 0.8 1.  0.9 0.9 0.7 0.8 1. ]
 
 mean value: nan
 
@@ -21100,8 +15475,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.8        0.85714286 0.8        1.                nan        nan
- 0.85714286 1.         1.         1.        ]
+value: [0.88888889 0.90909091        nan 0.8        1.         0.90909091
+ 0.88888889 0.72727273 0.83333333 1.        ]
 
 mean value: nan
 
@@ -21111,7 +15486,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [1.   0.75 1.   1.    nan  nan 0.75 1.   1.   1.  ]
+value: [1.         0.83333333        nan 0.8        1.         0.83333333
+ 1.         0.66666667 0.71428571 1.        ]
 
 mean value: nan
 
@@ -21121,8 +15497,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.66666667 1.         0.66666667 1.                nan        nan
- 1.         1.         1.         1.        ]
+value: [0.8 1.  nan 0.8 1.  1.  0.8 0.8 1.  1. ]
 
 mean value: nan
 
@@ -21132,8 +15507,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.83333333 0.83333333 0.83333333 1.                nan        nan
- 0.83333333 1.         1.         1.        ]
+value: [0.9 0.9 nan 0.8 1.  0.9 0.9 0.7 0.8 1. ]
 
 mean value: nan
 
@@ -21143,8 +15517,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.66666667 0.75       0.66666667 1.                nan        nan
- 0.75       1.         1.         1.        ]
+value: [0.8        0.83333333        nan 0.66666667 1.         0.83333333
+ 0.8        0.57142857 0.71428571 1.        ]
 
 mean value: nan
 
@@ -21153,9 +15527,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: 0.89
+MCC on Blind test: 0.84
 
-Accuracy on Blind test: 0.95
+Accuracy on Blind test: 0.92
 
 Model_name: QDA 
 Model func: QuadraticDiscriminantAnalysis() 
@@ -21188,20 +15562,20 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', QuadraticDiscriminantAnalysis())])
 
 key: fit_time 
-value: [0.00955558 0.00891328 0.00895953 0.00941253 0.00888014 0.00890732
- 0.00881815 0.00965381 0.00900769 0.01232696]
+value: [0.01176    0.01463223 0.01451039 0.01862359 0.0145371  0.01461315
+ 0.01455307 0.0146296  0.01553369 0.01768708]
 
-mean value: 0.00944349765777588
+mean value: 0.015107989311218262
 
 key: score_time 
-value: [0.00867105 0.00853944 0.00854969 0.00861311 0.00435209 0.00431705
- 0.0086236  0.00882101 0.0087564  0.01142502]
+value: [0.01168466 0.01204658 0.00626183 0.01207185 0.01193285 0.01204848
+ 0.01547551 0.01594973 0.0170939  0.01871085]
 
-mean value: 0.008066844940185548
+mean value: 0.013327622413635254
 
 key: test_mcc 
-value: [ 0.70710678 -0.4472136   0.70710678  0.33333333         nan         nan
- -0.4472136   0.          0.61237244  0.61237244]
+value: [0.81649658 0.81649658        nan 0.5        1.         0.81649658
+ 0.65465367 0.5        0.81649658 0.65465367]
 
 mean value: nan
 
@@ -21211,8 +15585,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.83333333 0.33333333 0.83333333 0.66666667        nan        nan
- 0.33333333 0.5        0.8        0.8       ]
+value: [0.9 0.9 nan 0.7 1.  0.9 0.8 0.7 0.9 0.8]
 
 mean value: nan
 
@@ -21222,8 +15595,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.8        0.         0.85714286 0.66666667        nan        nan
- 0.5        0.57142857 0.66666667 0.85714286]
+value: [0.88888889 0.88888889        nan 0.57142857 1.         0.88888889
+ 0.75       0.57142857 0.88888889 0.75      ]
 
 mean value: nan
 
@@ -21233,8 +15606,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [1.         0.         0.75       0.66666667        nan        nan
- 0.4        0.5        1.         0.75      ]
+value: [ 1.  1. nan  1.  1.  1.  1.  1.  1.  1.]
 
 mean value: nan
 
@@ -21244,8 +15616,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.66666667 0.         1.         0.66666667        nan        nan
- 0.66666667 0.66666667 0.5        1.        ]
+value: [0.8 0.8 nan 0.4 1.  0.8 0.6 0.4 0.8 0.6]
 
 mean value: nan
 
@@ -21255,8 +15626,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.83333333 0.33333333 0.83333333 0.66666667        nan        nan
- 0.33333333 0.5        0.75       0.75      ]
+value: [0.9 0.9 nan 0.7 1.  0.9 0.8 0.7 0.9 0.8]
 
 mean value: nan
 
@@ -21266,8 +15636,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.66666667 0.         0.75       0.5               nan        nan
- 0.33333333 0.4        0.5        0.75      ]
+value: [0.8 0.8 nan 0.4 1.  0.8 0.6 0.4 0.8 0.6]
 
 mean value: nan
 
@@ -21276,12 +15645,13 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: 0.07
+MCC on Blind test: 0.0
 
-Accuracy on Blind test: 0.52
+Accuracy on Blind test: 0.65
 
 Model_name: Ridge Classifier 
-Model func: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Model func: RidgeClassifier(random_state=42) 
+List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
     return cache[method]
@@ -21329,56 +15699,7 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
   warnings.warn(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
-    return cache[method]
-KeyError: 'predict'
-
-During handling of the above exception, another exception occurred:
-
-Traceback (most recent call last):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
-    scores = scorer(estimator, X_test, y_test)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
-    score = scorer._score(cached_call, estimator, *args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
-    y_pred = method_caller(estimator, "predict", X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
-    result = getattr(estimator, method)(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
-    Xt = transform.transform(Xt)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
-    Xs = self._fit_transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
-    return Parallel(n_jobs=self.n_jobs)(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
-    while self.dispatch_one_batch(iterator):
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
-    self._dispatch(tasks)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
-    job = self._backend.apply_async(batch, callback=cb)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
-    result = ImmediateResult(func)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
-    self.results = batch()
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
-    return [func(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
-    return self.function(*args, **kwargs)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
-    res = transformer.transform(X)
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
-    X_int, X_mask = self._transform(
-  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
-    raise ValueError(msg)
-ValueError: Found unknown categories ['Other'] in column 5 during transform
-
-  warnings.warn(
-RidgeClassifier(random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
                        n_estimators=1000, n_jobs=10, oob_score=True,
                        random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
@@ -21407,64 +15728,66 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', RidgeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.0125978  0.01234961 0.01238513 0.01238847 0.01241469 0.01236916
- 0.02667212 0.01334715 0.01305318 0.0137136 ]
+value: [0.02705026 0.01304579 0.01301503 0.03505015 0.03305626 0.02976966
+ 0.03926349 0.02756858 0.0333178  0.03212309]
 
-mean value: 0.014129090309143066
+mean value: 0.028326010704040526
 
 key: score_time 
-value: [0.01133299 0.01127529 0.01127625 0.01128983 0.00598621 0.00598669
- 0.01129127 0.01196098 0.0119524  0.01206422]
+value: [0.01200557 0.01179862 0.00627947 0.02354193 0.02322149 0.02598286
+ 0.02078986 0.02019954 0.02314901 0.02163339]
 
-mean value: 0.01044161319732666
+mean value: 0.018860173225402833
 
 key: test_mcc 
-value: [0.33333333 1.         0.4472136  1.                nan        nan
- 0.70710678 1.         1.         1.        ]
+value: [0.40824829 0.65465367        nan 0.2        0.81649658 1.
+ 0.40824829 0.81649658 0.21821789 1.        ]
 
 mean value: nan
 
 key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.97801929 0.97801929 0.97801929 0.97801929 0.97801929 0.97801929
+ 0.97801929 0.97801929 1.         0.97801929]
 
-mean value: 1.0
+mean value: 0.9802173644592863
 
 key: test_accuracy 
-value: [0.66666667 1.         0.66666667 1.                nan        nan
- 0.83333333 1.         1.         1.        ]
+value: [0.7 0.8 nan 0.6 0.9 1.  0.7 0.9 0.6 1. ]
 
 mean value: nan
 
 key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
+ 0.98888889 0.98888889 1.         0.98888889]
 
-mean value: 1.0
+mean value: 0.99
 
 key: test_fscore 
-value: [0.66666667 1.         0.75       1.                nan        nan
- 0.8        1.         1.         1.        ]
+value: [0.72727273 0.83333333        nan 0.6        0.90909091 1.
+ 0.66666667 0.90909091 0.66666667 1.        ]
 
 mean value: nan
 
 key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.98901099 0.98901099 0.98901099 0.98901099 0.98901099 0.98901099
+ 0.98901099 0.98901099 1.         0.98901099]
 
-mean value: 1.0
+mean value: 0.9901098901098901
 
 key: test_precision 
-value: [0.66666667 1.         0.6        1.                nan        nan
- 1.         1.         1.         1.        ]
+value: [0.66666667 0.71428571        nan 0.6        0.83333333 1.
+ 0.75       0.83333333 0.57142857 1.        ]
 
 mean value: nan
 
 key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
+ 0.97826087 0.97826087 1.         0.97826087]
 
-mean value: 1.0
+mean value: 0.9804347826086957
 
 key: test_recall 
-value: [0.66666667 1.         1.         1.                nan        nan
- 0.66666667 1.         1.         1.        ]
+value: [0.8 1.  nan 0.6 1.  1.  0.6 1.  0.8 1. ]
 
 mean value: nan
 
@@ -21474,30 +15797,31 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.66666667 1.         0.66666667 1.                nan        nan
- 0.83333333 1.         1.         1.        ]
+value: [0.7 0.8 nan 0.6 0.9 1.  0.7 0.9 0.6 1. ]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
+ 0.98888889 0.98888889 1.         0.98888889]
 
-mean value: 1.0
+mean value: 0.99
 
 key: test_jcc 
-value: [0.5        1.         0.6        1.                nan        nan
- 0.66666667 1.         1.         1.        ]
+value: [0.57142857 0.71428571        nan 0.42857143 0.83333333 1.
+ 0.5        0.83333333 0.5        1.        ]
 
 mean value: nan
 
 key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
+ 0.97826087 0.97826087 1.         0.97826087]
 
-mean value: 1.0
+mean value: 0.9804347826086957
 
-MCC on Blind test: 0.25
+MCC on Blind test: 0.37
 
-Accuracy on Blind test: 0.62
+Accuracy on Blind test: 0.72
 
 Model_name: Ridge ClassifierCV 
 Model func: RidgeClassifierCV(cv=10) 
@@ -21561,6 +15885,16 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['XDR'] in column 5 during transform
 
   warnings.warn(
+/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:156: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame
+
+See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
+/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:159: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame
+
+See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
 /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
 Traceback (most recent call last):
   File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
@@ -21609,16 +15943,54 @@ Traceback (most recent call last):
 ValueError: Found unknown categories ['Other'] in column 5 during transform
 
   warnings.warn(
-/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:168: SettingWithCopyWarning: 
-A value is trying to be set on a copy of a slice from a DataFrame
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
 
-See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
-  rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
-/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:171: SettingWithCopyWarning: 
-A value is trying to be set on a copy of a slice from a DataFrame
+During handling of the above exception, another exception occurred:
 
-See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
-  rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
 Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
                                    transformers=[('num', MinMaxScaler(),
@@ -21636,64 +16008,66 @@ Pipeline(steps=[('prep',
                 ('model', RidgeClassifierCV(cv=10))])
 
 key: fit_time 
-value: [0.08033776 0.07908654 0.07868695 0.07865906 0.07928276 0.08904433
- 0.09892082 0.08086753 0.07872057 0.07906723]
+value: [0.11854625 0.19753456 0.18856549 0.10754013 0.16717076 0.19964838
+ 0.11414194 0.19587135 0.26366663 0.21352744]
 
-mean value: 0.08226735591888427
+mean value: 0.17662129402160645
 
 key: score_time 
-value: [0.01172042 0.01198864 0.01163435 0.01175618 0.00623584 0.00627875
- 0.01186275 0.01175117 0.01167297 0.01161075]
+value: [0.02023578 0.02330852 0.01283813 0.01211834 0.02970982 0.02259326
+ 0.01234365 0.02518892 0.02464271 0.02279258]
 
-mean value: 0.010651183128356934
+mean value: 0.020577168464660643
 
 key: test_mcc 
-value: [0.         1.         0.4472136  0.70710678        nan        nan
- 0.33333333 1.         1.         1.        ]
+value: [0.40824829 0.65465367        nan 0.2        0.81649658 0.65465367
+ 0.40824829 0.81649658 0.21821789 1.        ]
 
 mean value: nan
 
 key: train_mcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.97801929 0.97801929 0.97801929 0.97801929 0.97801929 1.
+ 0.97801929 0.97801929 1.         0.97801929]
 
-mean value: 1.0
+mean value: 0.9824154350749212
 
 key: test_accuracy 
-value: [0.5        1.         0.66666667 0.83333333        nan        nan
- 0.66666667 1.         1.         1.        ]
+value: [0.7 0.8 nan 0.6 0.9 0.8 0.7 0.9 0.6 1. ]
 
 mean value: nan
 
 key: train_accuracy 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 1.
+ 0.98888889 0.98888889 1.         0.98888889]
 
-mean value: 1.0
+mean value: 0.9911111111111112
 
 key: test_fscore 
-value: [0.57142857 1.         0.75       0.8               nan        nan
- 0.66666667 1.         1.         1.        ]
+value: [0.72727273 0.83333333        nan 0.6        0.90909091 0.83333333
+ 0.66666667 0.90909091 0.66666667 1.        ]
 
 mean value: nan
 
 key: train_fscore 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.98901099 0.98901099 0.98901099 0.98901099 0.98901099 1.
+ 0.98901099 0.98901099 1.         0.98901099]
 
-mean value: 1.0
+mean value: 0.9912087912087912
 
 key: test_precision 
-value: [0.5        1.         0.6        1.                nan        nan
- 0.66666667 1.         1.         1.        ]
+value: [0.66666667 0.71428571        nan 0.6        0.83333333 0.71428571
+ 0.75       0.83333333 0.57142857 1.        ]
 
 mean value: nan
 
 key: train_precision 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 1.
+ 0.97826087 0.97826087 1.         0.97826087]
 
-mean value: 1.0
+mean value: 0.9826086956521739
 
 key: test_recall 
-value: [0.66666667 1.         1.         0.66666667        nan        nan
- 0.66666667 1.         1.         1.        ]
+value: [0.8 1.  nan 0.6 1.  1.  0.6 1.  0.8 1. ]
 
 mean value: nan
 
@@ -21703,30 +16077,31 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.5        1.         0.66666667 0.83333333        nan        nan
- 0.66666667 1.         1.         1.        ]
+value: [0.7 0.8 nan 0.6 0.9 0.8 0.7 0.9 0.6 1. ]
 
 mean value: nan
 
 key: train_roc_auc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 1.
+ 0.98888889 0.98888889 1.         0.98888889]
 
-mean value: 1.0
+mean value: 0.991111111111111
 
 key: test_jcc 
-value: [0.4        1.         0.6        0.66666667        nan        nan
- 0.5        1.         1.         1.        ]
+value: [0.57142857 0.71428571        nan 0.42857143 0.83333333 0.71428571
+ 0.5        0.83333333 0.5        1.        ]
 
 mean value: nan
 
 key: train_jcc 
-value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 1.
+ 0.97826087 0.97826087 1.         0.97826087]
 
-mean value: 1.0
+mean value: 0.9826086956521739
 
-MCC on Blind test: 0.25
+MCC on Blind test: 0.37
 
-Accuracy on Blind test: 0.62
+Accuracy on Blind test: 0.72
 
 Model_name: Logistic Regression 
 Model func: LogisticRegression(random_state=42) 
@@ -21759,613 +16134,520 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', LogisticRegression(random_state=42))])
 
 key: fit_time 
-value: [0.02765584 0.02437687 0.02343535 0.02426672 0.02423263 0.0224123
- 0.02587819 0.02598929 0.02278066 0.02541733]
+value: [0.02091384 0.02012491 0.02000952 0.02053618 0.02216005 0.02067947
+ 0.03914547 0.03124046 0.02153111 0.02034235]
 
-mean value: 0.02464451789855957
+mean value: 0.023668336868286132
 
 key: score_time 
-value: [0.01162863 0.01157403 0.01157665 0.01159024 0.01161504 0.01156235
- 0.01163483 0.0118084  0.011554   0.0116539 ]
+value: [0.01170325 0.01167917 0.01161551 0.01167727 0.01176381 0.00625944
+ 0.00652528 0.01202559 0.01162553 0.011724  ]
 
-mean value: 0.011619806289672852
+mean value: 0.010659885406494141
 
 key: test_mcc 
-value: [0.65465367 0.6        0.21821789 0.40824829 0.81649658 0.65465367
- 0.81649658 0.65465367 0.40824829 0.40824829]
+value: [ 0.33333333  0.70710678  0.         -0.70710678  0.                 nan
+         nan  1.          1.          0.61237244]
 
-mean value: 0.5639916935606966
+mean value: nan
 
 key: train_mcc 
-value: [0.91201231 0.93356387 0.84465303 0.97801929 0.93356387 0.91111111
- 0.91201231 0.91201231 0.93356387 0.97801929]
+value: [0.9258201  0.92307692 0.9258201  1.         1.         0.96225045
+ 0.9258201  0.96225045 0.96291111 0.96296296]
 
-mean value: 0.9248531267284181
+mean value: 0.9550912190805847
 
 key: test_accuracy 
-value: [0.8 0.8 0.6 0.7 0.9 0.8 0.9 0.8 0.7 0.7]
+value: [0.66666667 0.83333333 0.5        0.16666667 0.5               nan
+        nan 1.         1.         0.8       ]
 
-mean value: 0.77
+mean value: nan
 
 key: train_accuracy 
-value: [0.95555556 0.96666667 0.92222222 0.98888889 0.96666667 0.95555556
- 0.95555556 0.95555556 0.96666667 0.98888889]
+value: [0.96153846 0.96153846 0.96153846 1.         1.         0.98076923
+ 0.96153846 0.98076923 0.98113208 0.98113208]
 
-mean value: 0.9622222222222223
+mean value: 0.9769956458635704
 
 key: test_fscore 
-value: [0.83333333 0.8        0.5        0.72727273 0.90909091 0.75
- 0.90909091 0.83333333 0.66666667 0.66666667]
+value: [0.66666667 0.85714286 0.66666667 0.28571429 0.4               nan
+        nan 1.         1.         0.85714286]
 
-mean value: 0.7595454545454545
+mean value: nan
 
 key: train_fscore 
-value: [0.95652174 0.96703297 0.92134831 0.98901099 0.96703297 0.95555556
- 0.95652174 0.95652174 0.96703297 0.98901099]
+value: [0.96       0.96153846 0.96296296 1.         1.         0.98113208
+ 0.96296296 0.98113208 0.98181818 0.98113208]
 
-mean value: 0.962558996667448
+mean value: 0.9772678795697664
 
 key: test_precision 
-value: [0.71428571 0.8        0.66666667 0.66666667 0.83333333 1.
- 0.83333333 0.71428571 0.75       0.75      ]
+value: [0.66666667 0.75       0.5        0.25       0.5               nan
+        nan 1.         1.         0.75      ]
 
-mean value: 0.7728571428571429
+mean value: nan
 
 key: train_precision 
-value: [0.93617021 0.95652174 0.93181818 0.97826087 0.95652174 0.95555556
- 0.93617021 0.93617021 0.95652174 0.97826087]
+value: [1.         0.96153846 0.92857143 1.         1.         0.96296296
+ 0.92857143 0.96296296 0.96428571 0.96296296]
 
-mean value: 0.9521971332193349
+mean value: 0.9671855921855922
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
 
 key: test_recall 
-value: [1.  0.8 0.4 0.8 1.  0.6 1.  1.  0.6 0.6]
+value: [0.66666667 1.         1.         0.33333333 0.33333333        nan
+        nan 1.         1.         1.        ]
 
-mean value: 0.78
+mean value: nan
 
 key: train_recall 
-value: [0.97777778 0.97777778 0.91111111 1.         0.97777778 0.95555556
- 0.97777778 0.97777778 0.97777778 1.        ]
+value: [0.92307692 0.96153846 1.         1.         1.         1.
+ 1.         1.         1.         1.        ]
 
-mean value: 0.9733333333333333
+mean value: 0.9884615384615385
 
 key: test_roc_auc 
-value: [0.8 0.8 0.6 0.7 0.9 0.8 0.9 0.8 0.7 0.7]
+value: [0.66666667 0.83333333 0.5        0.16666667 0.5               nan
+        nan 1.         1.         0.75      ]
 
-mean value: 0.77
+mean value: nan
 
-key: train_roc_auc /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+key: train_roc_auc 
+value: [0.96153846 0.96153846 0.96153846 1.         1.         0.98076923
+ 0.96153846 0.98076923 0.98076923 0.98148148]
 
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
-STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
-
-Increase the number of iterations (max_iter) or scale the data as shown in:
-    https://scikit-learn.org/stable/modules/preprocessing.html
-Please also refer to the documentation for alternative solver options:
-    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
-  n_iter_i = _check_optimize_result(
-
-value: [0.95555556 0.96666667 0.92222222 0.98888889 0.96666667 0.95555556
- 0.95555556 0.95555556 0.96666667 0.98888889]
-
-mean value: 0.9622222222222223
+mean value: 0.976994301994302
 
 key: test_jcc 
-value: [0.71428571 0.66666667 0.33333333 0.57142857 0.83333333 0.6
- 0.83333333 0.71428571 0.5        0.5       ]
+value: [0.5        0.75       0.5        0.16666667 0.25              nan
+        nan 1.         1.         0.75      ]
 
-mean value: 0.6266666666666667
+mean value: nan
 
 key: train_jcc 
-value: [0.91666667 0.93617021 0.85416667 0.97826087 0.93617021 0.91489362
- 0.91666667 0.91666667 0.93617021 0.97826087]
+value: [0.92307692 0.92592593 0.92857143 1.         1.         0.96296296
+ 0.92857143 0.96296296 0.96428571 0.96296296]
 
-mean value: 0.928409266111625
+mean value: 0.9559320309320309
 
-MCC on Blind test: 0.37
+MCC on Blind test: 0.21
 
-Accuracy on Blind test: 0.72
+Accuracy on Blind test: 0.65
 
 Model_name: Logistic RegressionCV 
 Model func: LogisticRegressionCV(random_state=42) 
@@ -22398,105 +16680,202 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', LogisticRegressionCV(random_state=42))])
 
 key: fit_time 
-value: [0.57545948 0.71273303 0.62566566 0.53029251 0.58704925 0.67743158
- 0.58735538 0.63677311 0.5939045  0.59990978]
+value: [0.32553601 0.34272623 0.32583499 0.36345124 0.3234396  0.33710957
+ 0.35224915 0.35124803 0.39501452 0.36207604]
 
-mean value: 0.6126574277877808
+mean value: 0.347868537902832
 
 key: score_time 
-value: [0.01562166 0.01615906 0.01187754 0.01739883 0.01621699 0.01188135
- 0.01306725 0.01308417 0.01306224 0.01307702]
+value: [0.01237249 0.01205635 0.01210785 0.01214385 0.01202536 0.00631595
+ 0.00638008 0.01196527 0.01431847 0.01206231]
 
-mean value: 0.014144611358642579
+mean value: 0.011174798011779785
 
 key: test_mcc 
-value: [0.65465367 0.81649658 0.81649658 0.40824829 0.6        0.81649658
- 0.65465367 0.81649658 0.81649658 0.81649658]
+value: [0.         0.70710678 0.         0.         0.                nan
+        nan 1.         1.         0.61237244]
 
-mean value: 0.7216535117446173
+mean value: nan
 
 key: train_mcc 
-value: [1.         1.         0.95555556 1.         1.         0.95555556
+value: [0.88527041 1.         0.84866842 1.         1.         0.88527041
  1.         1.         1.         1.        ]
 
-mean value: 0.9911111111111112
+mean value: 0.9619209250306358
 
 key: test_accuracy 
-value: [0.8 0.9 0.9 0.7 0.8 0.9 0.8 0.9 0.9 0.9]
+value: [0.5        0.83333333 0.5        0.5        0.5               nan
+        nan 1.         1.         0.8       ]
 
-mean value: 0.85
+mean value: nan
 
 key: train_accuracy 
-value: [1.         1.         0.97777778 1.         1.         0.97777778
+value: [0.94230769 1.         0.92307692 1.         1.         0.94230769
  1.         1.         1.         1.        ]
 
-mean value: 0.9955555555555555
+mean value: 0.9807692307692307
 
 key: test_fscore 
-value: [0.83333333 0.90909091 0.90909091 0.72727273 0.8        0.88888889
- 0.83333333 0.90909091 0.88888889 0.90909091]
+value: [0.4        0.85714286 0.66666667 0.57142857 0.4               nan
+        nan 1.         1.         0.85714286]
 
-mean value: 0.8608080808080808
+mean value: nan
 
 key: train_fscore 
-value: [1.         1.         0.97777778 1.         1.         0.97777778
+value: [0.94117647 1.         0.92592593 1.         1.         0.94339623
  1.         1.         1.         1.        ]
 
-mean value: 0.9955555555555555
+mean value: 0.9810498622929256
 
 key: test_precision 
-value: [0.71428571 0.83333333 0.83333333 0.66666667 0.8        1.
- 0.71428571 0.83333333 1.         0.83333333]
+value: [0.5  0.75 0.5  0.5  0.5   nan  nan 1.   1.   0.75]
 
-mean value: 0.8228571428571428
+mean value: nan
 
 key: train_precision 
-value: [1.         1.         0.97777778 1.         1.         0.97777778
+value: [0.96       1.         0.89285714 1.         1.         0.92592593
  1.         1.         1.         1.        ]
 
-mean value: 0.9955555555555555
+mean value: 0.9778783068783069
 
 key: test_recall 
-value: [1.  1.  1.  0.8 0.8 0.8 1.  1.  0.8 1. ]
+value: [0.33333333 1.         1.         0.66666667 0.33333333        nan
+        nan 1.         1.         1.        ]
 
-mean value: 0.92
+mean value: nan
 
 key: train_recall 
-value: [1.         1.         0.97777778 1.         1.         0.97777778
+value: [0.92307692 1.         0.96153846 1.         1.         0.96153846
  1.         1.         1.         1.        ]
 
-mean value: 0.9955555555555555
+mean value: 0.9846153846153847
 
 key: test_roc_auc 
-value: [0.8 0.9 0.9 0.7 0.8 0.9 0.8 0.9 0.9 0.9]
+value: [0.5        0.83333333 0.5        0.5        0.5               nan
+        nan 1.         1.         0.75      ]
 
-mean value: 0.8500000000000001
+mean value: nan
 
 key: train_roc_auc 
-value: [1.         1.         0.97777778 1.         1.         0.97777778
+value: [0.94230769 1.         0.92307692 1.         1.         0.94230769
  1.         1.         1.         1.        ]
 
-mean value: 0.9955555555555555
+mean value: 0.9807692307692308
 
 key: test_jcc 
-value: [0.71428571 0.83333333 0.83333333 0.57142857 0.66666667 0.8
- 0.71428571 0.83333333 0.8        0.83333333]
+value: [0.25 0.75 0.5  0.4  0.25  nan  nan 1.   1.   0.75]
 
-mean value: 0.76
+mean value: nan
 
 key: train_jcc 
-value: [1.         1.         0.95652174 1.         1.         0.95652174
+value: [0.88888889 1.         0.86206897 1.         1.         0.89285714
  1.         1.         1.         1.        ]
 
-mean value: 0.991304347826087
+mean value: 0.9643814997263274
 
-MCC on Blind test: 0.36
+MCC on Blind test: 0.21
 
-Accuracy on Blind test: 0.72
+Accuracy on Blind test: 0.65
 
 Model_name: Gaussian NB 
 Model func: GaussianNB() 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
                        n_estimators=1000, n_jobs=10, oob_score=True,
                        random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
               colsample_bynode=None, colsample_bytree=None,
@@ -22525,99 +16904,102 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', GaussianNB())])
 
 key: fit_time 
-value: [0.01235056 0.01039577 0.00935602 0.00850844 0.00858593 0.00864339
- 0.00865698 0.00895977 0.0085566  0.00893736]
+value: [0.01288819 0.01199651 0.01104617 0.00844717 0.00849009 0.00838327
+ 0.00898004 0.00881791 0.00832319 0.0085094 ]
 
-mean value: 0.009295082092285157
+mean value: 0.009588193893432618
 
 key: score_time 
-value: [0.01156569 0.00890326 0.00879598 0.00853562 0.00855756 0.00884724
- 0.0086143  0.00869632 0.0087235  0.00869799]
+value: [0.01248598 0.01183987 0.00980401 0.00890875 0.0086236  0.00427198
+ 0.0042758  0.00965548 0.00923252 0.00854301]
 
-mean value: 0.008993744850158691
+mean value: 0.008764100074768067
 
 key: test_mcc 
-value: [ 0.          0.5         0.21821789  0.21821789 -0.21821789  0.21821789
-  0.2         0.5        -0.5         0.        ]
+value: [0.         0.         0.4472136  0.         0.                nan
+        nan 0.33333333 0.61237244 0.66666667]
 
-mean value: 0.11364357804719848
+mean value: nan
 
 key: train_mcc 
-value: [0.56568542 0.64168895 0.57642872 0.53452248 0.48420012 0.58456547
- 0.69509522 0.77854709 0.53031442 0.8675239 ]
+value: [0.70064905 0.65824263 0.81312325 0.66666667 0.76923077 0.5990423
+ 0.58333333 0.71151247 0.73357097 0.70527596]
 
-mean value: 0.6258571808190334
+mean value: 0.6940647388614967
 
 key: test_accuracy 
-value: [0.5 0.7 0.6 0.6 0.4 0.6 0.6 0.7 0.3 0.5]
+value: [0.5        0.5        0.66666667 0.5        0.5               nan
+        nan 0.66666667 0.8        0.8       ]
 
-mean value: 0.5499999999999999
+mean value: nan
 
 key: train_accuracy 
-value: [0.76666667 0.81111111 0.77777778 0.72222222 0.73333333 0.77777778
- 0.84444444 0.88888889 0.75555556 0.93333333]
+value: [0.84615385 0.82692308 0.90384615 0.80769231 0.88461538 0.78846154
+ 0.76923077 0.84615385 0.8490566  0.8490566 ]
 
-mean value: 0.8011111111111111
+mean value: 0.8371190130624093
 
 key: test_fscore 
-value: [0.61538462 0.76923077 0.66666667 0.66666667 0.5        0.66666667
- 0.6        0.76923077 0.46153846 0.54545455]
+value: [0.4        0.57142857 0.75       0.57142857 0.57142857        nan
+        nan 0.66666667 0.66666667 0.8       ]
 
-mean value: 0.6260839160839161
+mean value: nan
 
 key: train_fscore 
-value: [0.8        0.83168317 0.80392157 0.7826087  0.76470588 0.80769231
- 0.83333333 0.88636364 0.78431373 0.93478261]
+value: [0.83333333 0.81632653 0.89795918 0.76190476 0.88461538 0.81355932
+ 0.71428571 0.82608696 0.82608696 0.83333333]
 
-mean value: 0.8229404926524524
+mean value: 0.8207491476835619
 
 key: test_precision 
-value: [0.5        0.625      0.57142857 0.57142857 0.42857143 0.57142857
- 0.6        0.625      0.375      0.5       ]
+value: [0.5        0.5        0.6        0.5        0.5               nan
+        nan 0.66666667 1.         1.        ]
 
-mean value: 0.5367857142857143
+mean value: nan
 
 key: train_precision 
-value: [0.7        0.75       0.71929825 0.64285714 0.68421053 0.71186441
- 0.8974359  0.90697674 0.70175439 0.91489362]
+value: [0.90909091 0.86956522 0.95652174 1.         0.88461538 0.72727273
+ 0.9375     0.95       1.         0.90909091]
 
-mean value: 0.7629290966174761
+mean value: 0.9143656886591669
 
 key: test_recall 
-value: [0.8 1.  0.8 0.8 0.6 0.8 0.6 1.  0.6 0.6]
+value: [0.33333333 0.66666667 1.         0.66666667 0.66666667        nan
+        nan 0.66666667 0.5        0.66666667]
 
-mean value: 0.76
+mean value: nan
 
 key: train_recall 
-value: [0.93333333 0.93333333 0.91111111 1.         0.86666667 0.93333333
- 0.77777778 0.86666667 0.88888889 0.95555556]
+value: [0.76923077 0.76923077 0.84615385 0.61538462 0.88461538 0.92307692
+ 0.57692308 0.73076923 0.7037037  0.76923077]
 
-mean value: 0.9066666666666667
+mean value: 0.7588319088319089
 
 key: test_roc_auc 
-value: [0.5 0.7 0.6 0.6 0.4 0.6 0.6 0.7 0.3 0.5]
+value: [0.5        0.5        0.66666667 0.5        0.5               nan
+        nan 0.66666667 0.75       0.83333333]
 
-mean value: 0.55
+mean value: nan
 
 key: train_roc_auc 
-value: [0.76666667 0.81111111 0.77777778 0.72222222 0.73333333 0.77777778
- 0.84444444 0.88888889 0.75555556 0.93333333]
+value: [0.84615385 0.82692308 0.90384615 0.80769231 0.88461538 0.78846154
+ 0.76923077 0.84615385 0.85185185 0.84757835]
 
-mean value: 0.8011111111111111
+mean value: 0.8372507122507122
 
 key: test_jcc 
-value: [0.44444444 0.625      0.5        0.5        0.33333333 0.5
- 0.42857143 0.625      0.3        0.375     ]
+value: [0.25       0.4        0.6        0.4        0.4               nan
+        nan 0.5        0.5        0.66666667]
 
-mean value: 0.4631349206349206
+mean value: nan
 
 key: train_jcc 
-value: [0.66666667 0.71186441 0.67213115 0.64285714 0.61904762 0.67741935
- 0.71428571 0.79591837 0.64516129 0.87755102]
+value: [0.71428571 0.68965517 0.81481481 0.61538462 0.79310345 0.68571429
+ 0.55555556 0.7037037  0.7037037  0.71428571]
 
-mean value: 0.7022902730094179
+mean value: 0.6990206728137762
 
-MCC on Blind test: 0.31
+MCC on Blind test: 0.23
 
 Accuracy on Blind test: 0.65
 
@@ -22635,7 +17017,199 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), (
               predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
               scale_pos_weight=None, subsample=None, tree_method=None,
               use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: Pipeline(steps=[('prep',
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+Pipeline(steps=[('prep',
                  ColumnTransformer(remainder='passthrough',
                                    transformers=[('num', MinMaxScaler(),
                                                   Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
@@ -22652,101 +17226,104 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', BernoulliNB())])
 
 key: fit_time 
-value: [0.00903225 0.00919151 0.00904417 0.00956297 0.00893402 0.00883603
- 0.00865221 0.00857472 0.00858474 0.00852299]
+value: [0.00965214 0.00892282 0.00858331 0.00853586 0.0085423  0.00859547
+ 0.0098629  0.00870371 0.00837135 0.00841379]
 
-mean value: 0.008893561363220216
+mean value: 0.008818364143371582
 
 key: score_time 
-value: [0.00895262 0.00891352 0.00872064 0.00939298 0.00859714 0.00868654
- 0.00841737 0.00842023 0.00843048 0.00861788]
+value: [0.00977111 0.00953913 0.00881982 0.00860023 0.00865912 0.00424123
+ 0.00450349 0.00848484 0.008569   0.00859237]
 
-mean value: 0.008714938163757324
+mean value: 0.007978034019470216
 
 key: test_mcc 
-value: [0.40824829 0.6        0.         0.21821789 0.40824829 0.65465367
- 0.6        0.65465367 0.5        0.        ]
+value: [ 0.4472136  -0.33333333  0.         -0.33333333  0.                 nan
+         nan -0.33333333  0.66666667  0.16666667]
 
-mean value: 0.4044021812579673
+mean value: nan
 
 key: train_mcc 
-value: [0.68888889 0.58137767 0.60540551 0.66683134 0.64700558 0.55776344
- 0.69162666 0.58137767 0.71269665 0.70004007]
+value: [0.77151675 0.63245553 0.84615385 0.82305489 0.80829038 0.77849894
+ 0.65433031 0.74466871 0.74106548 0.73609205]
 
-mean value: 0.6433013479547345
+mean value: 0.7536126885007749
 
 key: test_accuracy 
-value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5]
+value: [0.66666667 0.33333333 0.5        0.33333333 0.5               nan
+        nan 0.33333333 0.8        0.6       ]
 
-mean value: 0.69
+mean value: nan
 
 key: train_accuracy 
-value: [0.84444444 0.78888889 0.8        0.83333333 0.82222222 0.77777778
- 0.84444444 0.78888889 0.85555556 0.84444444]
+value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538
+ 0.82692308 0.86538462 0.86792453 0.86792453]
 
-mean value: 0.82
+mean value: 0.8735849056603774
 
 key: test_fscore 
-value: [0.72727273 0.8        0.44444444 0.66666667 0.66666667 0.75
- 0.8        0.83333333 0.57142857 0.28571429]
+value: [0.5        0.33333333 0.66666667 0.33333333 0.4               nan
+        nan 0.33333333 0.8        0.66666667]
 
-mean value: 0.6545526695526696
+mean value: nan
 
 key: train_fscore 
-value: [0.84444444 0.77647059 0.78571429 0.83146067 0.81395349 0.76744186
- 0.8372093  0.77647059 0.85057471 0.82926829]
+value: [0.88       0.7826087  0.92307692 0.89361702 0.90196078 0.875
+ 0.83018868 0.85106383 0.8627451  0.8627451 ]
 
-mean value: 0.8113008237276017
+mean value: 0.8663006129430366
 
 key: test_precision 
-value: [0.66666667 0.8        0.5        0.57142857 0.75       1.
- 0.8        0.71428571 1.         0.5       ]
+value: [1.         0.33333333 0.5        0.33333333 0.5               nan
+        nan 0.33333333 0.66666667 0.66666667]
 
-mean value: 0.7302380952380952
+mean value: nan
 
 key: train_precision 
-value: [0.84444444 0.825      0.84615385 0.84090909 0.85365854 0.80487805
- 0.87804878 0.825      0.88095238 0.91891892]
+value: [0.91666667 0.9        0.92307692 1.         0.92       0.95454545
+ 0.81481481 0.95238095 0.91666667 0.88      ]
 
-mean value: 0.851796404723234
+mean value: 0.9178151478151478
 
 key: test_recall 
-value: [0.8 0.8 0.4 0.8 0.6 0.6 0.8 1.  0.4 0.2]
+value: [0.33333333 0.33333333 1.         0.33333333 0.33333333        nan
+        nan 0.33333333 1.         0.66666667]
 
-mean value: 0.64
+mean value: nan
 
 key: train_recall 
-value: [0.84444444 0.73333333 0.73333333 0.82222222 0.77777778 0.73333333
- 0.8        0.73333333 0.82222222 0.75555556]
+value: [0.84615385 0.69230769 0.92307692 0.80769231 0.88461538 0.80769231
+ 0.84615385 0.76923077 0.81481481 0.84615385]
 
-mean value: 0.7755555555555556
+mean value: 0.8237891737891738
 
 key: test_roc_auc 
-value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5]
+value: [0.66666667 0.33333333 0.5        0.33333333 0.5               nan
+        nan 0.33333333 0.83333333 0.58333333]
 
-mean value: 0.6900000000000001
+mean value: nan
 
 key: train_roc_auc 
-value: [0.84444444 0.78888889 0.8        0.83333333 0.82222222 0.77777778
- 0.84444444 0.78888889 0.85555556 0.84444444]
+value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538
+ 0.82692308 0.86538462 0.86894587 0.86752137]
 
-mean value: 0.82
+mean value: 0.8736467236467237
 
 key: test_jcc 
-value: [0.57142857 0.66666667 0.28571429 0.5        0.5        0.6
- 0.66666667 0.71428571 0.4        0.16666667]
+value: [0.33333333 0.2        0.5        0.2        0.25              nan
+        nan 0.2        0.66666667 0.5       ]
 
-mean value: 0.5071428571428571
+mean value: nan
 
 key: train_jcc 
-value: [0.73076923 0.63461538 0.64705882 0.71153846 0.68627451 0.62264151
- 0.72       0.63461538 0.74       0.70833333]
+value: [0.78571429 0.64285714 0.85714286 0.80769231 0.82142857 0.77777778
+ 0.70967742 0.74074074 0.75862069 0.75862069]
 
-mean value: 0.683584663763909
+mean value: 0.7660272482018867
 
-MCC on Blind test: 0.12
+MCC on Blind test: -0.03
 
-Accuracy on Blind test: 0.6
+Accuracy on Blind test: 0.5
 
 Model_name: K-Nearest Neighbors 
 Model func: KNeighborsClassifier() 
@@ -22779,101 +17356,200 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', KNeighborsClassifier())])
 
 key: fit_time 
-value: [0.0083065  0.00893617 0.0082252  0.00910974 0.00897431 0.00820327
- 0.00906706 0.00823522 0.00894833 0.00931168]
+value: [0.00931239 0.0100131  0.00874043 0.00838232 0.009269   0.00925255
+ 0.00977898 0.00942922 0.00934243 0.00922894]
 
-mean value: 0.008731746673583984
+mean value: 0.009274935722351075
 
 key: score_time 
-value: [0.00974131 0.00983167 0.00941205 0.00987244 0.00959492 0.00921226
- 0.00989819 0.01411557 0.01460791 0.01599026]
+value: [0.01010799 0.00981784 0.00952053 0.00997114 0.0101397  0.00483942
+ 0.00481582 0.01042175 0.01021457 0.01005983]
 
-mean value: 0.011227655410766601
+mean value: 0.008990859985351563
 
 key: test_mcc 
-value: [ 0.33333333  0.40824829 -0.21821789  0.          0.2         0.40824829
-  0.40824829  0.          0.33333333  0.        ]
+value: [ 0.          0.          0.         -0.70710678 -0.33333333         nan
+         nan  0.70710678  0.66666667  0.16666667]
 
-mean value: 0.18731936478222633
+mean value: nan
 
 key: train_mcc 
-value: [0.60540551 0.57906602 0.53452248 0.69162666 0.68888889 0.57906602
- 0.51571581 0.53452248 0.46712826 0.51314236]
+value: [0.54494926 0.4259217  0.54006172 0.62279916 0.66628253 0.58080232
+ 0.54006172 0.65433031 0.28612567 0.58766552]
 
-mean value: 0.5709084504401615
+mean value: 0.5448999906190637
 
 key: test_accuracy 
-value: [0.6 0.7 0.4 0.5 0.6 0.7 0.7 0.5 0.6 0.5]
+value: [0.5        0.5        0.5        0.16666667 0.33333333        nan
+        nan 0.83333333 0.8        0.6       ]
 
-mean value: 0.58
+mean value: nan
 
 key: train_accuracy 
-value: [0.8        0.78888889 0.76666667 0.84444444 0.84444444 0.78888889
- 0.75555556 0.76666667 0.73333333 0.75555556]
+value: [0.76923077 0.71153846 0.76923077 0.80769231 0.82692308 0.78846154
+ 0.76923077 0.82692308 0.64150943 0.79245283]
 
-mean value: 0.7844444444444445
+mean value: 0.7703193033381712
 
 key: test_fscore 
-value: [0.71428571 0.72727273 0.25       0.61538462 0.6        0.66666667
- 0.72727273 0.54545455 0.33333333 0.28571429]
+value: [0.4        0.4        0.66666667 0.         0.33333333        nan
+        nan 0.85714286 0.8        0.66666667]
 
-mean value: 0.5465384615384615
+mean value: nan
 
 key: train_fscore 
-value: [0.8125     0.79569892 0.75862069 0.85106383 0.84444444 0.79569892
- 0.73809524 0.75862069 0.72727273 0.76595745]
+value: [0.75       0.69387755 0.76       0.79166667 0.80851064 0.7755102
+ 0.77777778 0.82352941 0.62745098 0.7755102 ]
 
-mean value: 0.7847972915180865
+mean value: 0.7583833434082853
 
-key: test_precision 
-value: [0.55555556 0.66666667 0.33333333 0.5        0.6        0.75
- 0.66666667 0.5        1.         0.5       ]
+key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
 
-mean value: 0.6072222222222222
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+test_precision 
+value: [0.5        0.5        0.5        0.         0.33333333        nan
+        nan 0.75       0.66666667 0.66666667]
+
+mean value: nan
 
 key: train_precision 
-value: [0.76470588 0.77083333 0.78571429 0.81632653 0.84444444 0.77083333
- 0.79487179 0.78571429 0.74418605 0.73469388]
+value: [0.81818182 0.73913043 0.79166667 0.86363636 0.9047619  0.82608696
+ 0.75       0.84       0.66666667 0.82608696]
 
-mean value: 0.7812323814439311
+mean value: 0.8026217767739506
 
 key: test_recall 
-value: [1.  0.8 0.2 0.8 0.6 0.6 0.8 0.6 0.2 0.2]
+value: [0.33333333 0.33333333 1.         0.         0.33333333        nan
+        nan 1.         1.         0.66666667]
 
-mean value: 0.58
+mean value: nan
 
 key: train_recall 
-value: [0.86666667 0.82222222 0.73333333 0.88888889 0.84444444 0.82222222
- 0.68888889 0.73333333 0.71111111 0.8       ]
+value: [0.69230769 0.65384615 0.73076923 0.73076923 0.73076923 0.73076923
+ 0.80769231 0.80769231 0.59259259 0.73076923]
 
-mean value: 0.7911111111111111
+mean value: 0.7207977207977208
 
 key: test_roc_auc 
-value: [0.6 0.7 0.4 0.5 0.6 0.7 0.7 0.5 0.6 0.5]
+value: [0.5        0.5        0.5        0.16666667 0.33333333        nan
+        nan 0.83333333 0.83333333 0.58333333]
 
-mean value: 0.5800000000000001
+mean value: nan
 
 key: train_roc_auc 
-value: [0.8        0.78888889 0.76666667 0.84444444 0.84444444 0.78888889
- 0.75555556 0.76666667 0.73333333 0.75555556]
+value: [0.76923077 0.71153846 0.76923077 0.80769231 0.82692308 0.78846154
+ 0.76923077 0.82692308 0.64245014 0.79131054]
 
-mean value: 0.7844444444444445
+mean value: 0.7702991452991453
 
 key: test_jcc 
-value: [0.55555556 0.57142857 0.14285714 0.44444444 0.42857143 0.5
- 0.57142857 0.375      0.2        0.16666667]
+value: [0.25       0.25       0.5        0.         0.2               nan
+        nan 0.75       0.66666667 0.5       ]
 
-mean value: 0.3955952380952381
+mean value: nan
 
 key: train_jcc 
-value: [0.68421053 0.66071429 0.61111111 0.74074074 0.73076923 0.66071429
- 0.58490566 0.61111111 0.57142857 0.62068966]
+value: [0.6        0.53125    0.61290323 0.65517241 0.67857143 0.63333333
+ 0.63636364 0.7        0.45714286 0.63333333]
 
-mean value: 0.6476395178454898
+mean value: 0.6138070228344144
 
-MCC on Blind test: 0.08
+MCC on Blind test: 0.32
 
-Accuracy on Blind test: 0.57
+Accuracy on Blind test: 0.68
 
 Model_name: SVM 
 Model func: SVC(random_state=42) 
@@ -22906,101 +17582,5630 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', SVC(random_state=42))])
 
 key: fit_time 
-value: [0.01041508 0.0102284  0.00914741 0.00940919 0.00931716 0.00927925
- 0.00927663 0.00954556 0.00942039 0.0091517 ]
+value: [0.00890541 0.00955153 0.00889421 0.00859499 0.00959396 0.00865912
+ 0.01014996 0.00967169 0.01003861 0.01146197]
 
-mean value: 0.009519076347351075
+mean value: 0.00955214500427246
 
 key: score_time 
-value: [0.00887251 0.00886989 0.00857925 0.0089817  0.00945759 0.00883317
- 0.00878358 0.00884509 0.00862408 0.00881624]
+value: [0.00898552 0.00915551 0.00873256 0.00884151 0.00935507 0.00436473
+ 0.00465631 0.0094142  0.00948119 0.01040006]
 
-mean value: 0.008866310119628906
+mean value: 0.008338665962219239
 
 key: test_mcc 
-value: [0.81649658 0.6        0.21821789 0.21821789 0.6        0.65465367
- 0.40824829 0.65465367 0.2        0.5       ]
+value: [-0.4472136  -0.33333333  0.         -0.70710678  0.                 nan
+         nan  1.          1.          0.66666667]
 
-mean value: 0.4870487993279528
+mean value: nan
 
 key: train_mcc 
-value: [0.8675239  0.86666667 0.85485041 0.86666667 0.84632727 0.85485041
- 0.79036782 0.84632727 0.82548988 0.81649658]
+value: [0.89056356 0.84866842 0.92307692 0.89056356 0.96225045 0.9258201
+ 0.84866842 0.96225045 0.92724773 0.88730475]
 
-mean value: 0.8435566879416903
+mean value: 0.9066414371100951
 
 key: test_accuracy 
-value: [0.9 0.8 0.6 0.6 0.8 0.8 0.7 0.8 0.6 0.7]
+value: [0.33333333 0.33333333 0.5        0.16666667 0.5               nan
+        nan 1.         1.         0.8       ]
 
-mean value: 0.73
+mean value: nan
 
 key: train_accuracy 
-value: [0.93333333 0.93333333 0.92222222 0.93333333 0.92222222 0.92222222
- 0.88888889 0.92222222 0.91111111 0.9       ]
+value: [0.94230769 0.92307692 0.96153846 0.94230769 0.98076923 0.96153846
+ 0.92307692 0.98076923 0.96226415 0.94339623]
 
-mean value: 0.9188888888888889
+mean value: 0.9521044992743106
 
 key: test_fscore 
-value: [0.90909091 0.8        0.5        0.66666667 0.8        0.75
- 0.66666667 0.83333333 0.6        0.57142857]
+value: [0.         0.33333333 0.66666667 0.28571429 0.4               nan
+        nan 1.         1.         0.8       ]
 
-mean value: 0.7097186147186147
+mean value: nan
 
 key: train_fscore 
-value: [0.93181818 0.93333333 0.91566265 0.93333333 0.91954023 0.91566265
- 0.87804878 0.91954023 0.90697674 0.88888889]
+value: [0.93877551 0.92       0.96153846 0.93877551 0.98113208 0.96
+ 0.92592593 0.98039216 0.96153846 0.94117647]
 
-mean value: 0.9142805023022523
+mean value: 0.9509254572333691
 
 key: test_precision 
-value: [0.83333333 0.8        0.66666667 0.57142857 0.8        1.
- 0.75       0.71428571 0.6        1.        ]
+value: [0.         0.33333333 0.5        0.25       0.5               nan
+        nan 1.         1.         1.        ]
 
-mean value: 0.7735714285714286
+mean value: nan
 
 key: train_precision 
-value: [0.95348837 0.93333333 1.         0.93333333 0.95238095 1.
- 0.97297297 0.95238095 0.95121951 1.        ]
+value: [1.         0.95833333 0.96153846 1.         0.96296296 1.
+ 0.89285714 1.         1.         0.96      ]
 
-mean value: 0.964910942868969
+mean value: 0.9735691900691901
 
 key: test_recall 
-value: [1.  0.8 0.4 0.8 0.8 0.6 0.6 1.  0.6 0.4]
+value: [0.         0.33333333 1.         0.33333333 0.33333333        nan
+        nan 1.         1.         0.66666667]
+
+mean value: nan
+
+key: train_recall 
+value: [0.88461538 0.88461538 0.96153846 0.88461538 1.         0.92307692
+ 0.96153846 0.96153846 0.92592593 0.92307692]
+
+mean value: 0.931054131054131
+
+key: test_roc_auc 
+value: [0.33333333 0.33333333 0.5        0.16666667 0.5               nan
+        nan 1.         1.         0.83333333]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [0.94230769 0.92307692 0.96153846 0.94230769 0.98076923 0.96153846
+ 0.92307692 0.98076923 0.96296296 0.94301994]
+
+mean value: 0.9521367521367522
+
+key: test_jcc 
+value: [0.         0.2        0.5        0.16666667 0.25              nan
+        nan 1.         1.         0.66666667]
+
+mean value: nan
+
+key: train_jcc 
+value: [0.88461538 0.85185185 0.92592593 0.88461538 0.96296296 0.92307692
+ 0.86206897 0.96153846 0.92592593 0.88888889]
+
+mean value: 0.9071470674918951
+
+MCC on Blind test: 0.11
+
+Accuracy on Blind test: 0.57
+
+Model_name: MLP 
+Model func: MLPClassifier(max_iter=500, random_state=42) 
+List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', MLPClassifier(max_iter=500, random_state=42))])
+
+key: fit_time 
+value: [0.27838421 0.40681887 0.27178621 0.42920971 0.31241632 0.35591602
+ 0.32682848 0.27644706 0.28436017 0.2995646 ]
+
+mean value: 0.3241731643676758
+
+key: score_time 
+value: [0.01218176 0.01219702 0.01212978 0.01238561 0.01210904 0.00661492
+ 0.00656652 0.01193714 0.01193643 0.01183224]
+
+mean value: 0.010989046096801758
+
+key: test_mcc 
+value: [0.33333333 0.70710678 0.         0.         0.                nan
+        nan 0.70710678 1.         0.61237244]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [0.66666667 0.83333333 0.5        0.5        0.5               nan
+        nan 0.83333333 1.         0.8       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [0.66666667 0.85714286 0.66666667 0.57142857 0.4               nan
+        nan 0.85714286 1.         0.85714286]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [0.66666667 0.75       0.5        0.5        0.5               nan
+        nan 0.75       1.         0.75      ]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [0.66666667 1.         1.         0.66666667 0.33333333        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_roc_auc 
+value: [0.66666667 0.83333333 0.5        0.5        0.5               nan
+        nan 0.83333333 1.         0.75      ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_jcc 
+value: [0.5  0.75 0.5  0.4  0.25  nan  nan 0.75 1.   0.75]
+
+mean value: nan
+
+key: train_jcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+MCC on Blind test: -0.03
+
+Accuracy on Blind test: 0.5
+
+Model_name: Decision Tree 
+Model func: DecisionTreeClassifier(random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', DecisionTreeClassifier(random_state=42))])
+
+key: fit_time 
+value: [0.01573658 0.01213527 0.00921369 0.00900602 0.00878453 0.00891757
+ 0.0091784  0.00918722 0.00907421 0.00912213]
+
+mean value: 0.010035562515258788
+
+key: score_time 
+value: [0.01291966 0.00884986 0.00869918 0.00836587 0.00835061 0.0041151
+ 0.00418639 0.00848269 0.00831079 0.00835586]
+
+mean value: 0.008063602447509765
+
+key: test_mcc 
+value: [0.70710678 0.70710678 0.33333333 1.         0.70710678        nan
+        nan 0.70710678 1.         1.        ]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [0.83333333 0.83333333 0.66666667 1.         0.83333333        nan
+        nan 0.83333333 1.         1.        ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [0.8        0.85714286 0.66666667 1.         0.8               nan
+        nan 0.8        1.         1.        ]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [1.         0.75       0.66666667 1.         1.                nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [0.66666667 1.         0.66666667 1.         0.66666667        nan
+        nan 0.66666667 1.         1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_roc_auc 
+value: [0.83333333 0.83333333 0.66666667 1.         0.83333333        nan
+        nan 0.83333333 1.         1.        ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_jcc 
+value: [0.66666667 0.75       0.5        1.         0.66666667        nan
+        nan 0.66666667 1.         1.        ]
+
+mean value: nan
+
+key: train_jcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+MCC on Blind test: 0.75
+
+Accuracy on Blind test: 0.88
+
+Model_name: Extra Trees 
+Model func: ExtraTreesClassifier(random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', ExtraTreesClassifier(random_state=42))])
+
+key: fit_time 
+value: [0.07804847 0.07725072 0.07686806 0.07656288 0.07687521 0.07669759
+ 0.07711124 0.0770638  0.07734013 0.07700109]
+
+mean value: 0.07708191871643066
+
+key: score_time 
+value: [0.01668477 0.01657367 0.01659799 0.01660562 0.01659155 0.00435138
+ 0.0042994  0.01672506 0.01664042 0.01662922]
+
+mean value: 0.014169907569885254
+
+key: test_mcc 
+value: [0.         0.         0.4472136  0.         0.                nan
+        nan 1.         0.61237244 0.61237244]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [0.5        0.5        0.66666667 0.5        0.5               nan
+        nan 1.         0.8        0.8       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [0.4        0.57142857 0.75       0.4        0.4               nan
+        nan 1.         0.66666667 0.85714286]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [0.5  0.5  0.6  0.5  0.5   nan  nan 1.   1.   0.75]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [0.33333333 0.66666667 1.         0.33333333 0.33333333        nan
+        nan 1.         0.5        1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_roc_auc 
+value: [0.5        0.5        0.66666667 0.5        0.5               nan
+        nan 1.         0.75       0.75      ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_jcc 
+value: [0.25 0.4  0.6  0.25 0.25  nan  nan 1.   0.5  0.75]
+
+mean value: nan
+
+key: train_jcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+MCC on Blind test: 0.26
+
+Accuracy on Blind test: 0.65
+
+Model_name: Extra Tree 
+Model func: ExtraTreeClassifier(random_state=42) /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', ExtraTreeClassifier(random_state=42))])
+
+key: fit_time 
+value: [0.00831699 0.00808382 0.0081141  0.00813127 0.00810838 0.00812197
+ 0.00812531 0.00818348 0.00840473 0.00810528]
+
+mean value: 0.00816953182220459
+
+key: score_time 
+value: [0.00827551 0.00835371 0.00831056 0.00835538 0.00824785 0.00409269
+ 0.00411844 0.00829554 0.00831127 0.00829291]
+
+mean value: 0.007465386390686035
+
+key: test_mcc 
+value: [ 0.          0.         -0.33333333 -0.33333333 -0.33333333         nan
+         nan -0.33333333  0.16666667  0.66666667]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [0.5        0.5        0.33333333 0.33333333 0.33333333        nan
+        nan 0.33333333 0.6        0.8       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [0.57142857 0.4        0.33333333 0.33333333 0.33333333        nan
+        nan 0.33333333 0.5        0.8       ]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [0.5        0.5        0.33333333 0.33333333 0.33333333        nan
+        nan 0.33333333 0.5        1.        ]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [0.66666667 0.33333333 0.33333333 0.33333333 0.33333333        nan
+        nan 0.33333333 0.5        0.66666667]
+
+mean value: nan
+
+key: train_recall 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_roc_auc 
+value: [0.5        0.5        0.33333333 0.33333333 0.33333333        nan
+        nan 0.33333333 0.58333333 0.83333333]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_jcc 
+value: [0.4        0.25       0.2        0.2        0.2               nan
+        nan 0.2        0.33333333 0.66666667]
+
+mean value: nan
+
+key: train_jcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+MCC on Blind test: 0.26
+
+Accuracy on Blind test: 0.65
+
+Model_name: Random Forest 
+Model func: RandomForestClassifier(n_estimators=1000, random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model',
+                 RandomForestClassifier(n_estimators=1000, random_state=42))])
+
+key: fit_time 
+value: [1.0098381  0.95413804 0.96767163 0.95117521 0.95051098 0.95307279
+ 0.95376849 0.95404077 0.95342731 0.95893645]
+
+mean value: 0.9606579780578614
+
+key: score_time 
+value: [0.08670974 0.08692455 0.08682108 0.08670044 0.08678889 0.00440526
+ 0.00438118 0.0927279  0.08725643 0.08669782]
+
+mean value: 0.07094132900238037
+
+key: test_mcc 
+value: [ 0.          0.4472136   0.4472136  -0.33333333  0.                 nan
+         nan  1.          0.61237244  1.        ]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [0.5        0.66666667 0.66666667 0.33333333 0.5               nan
+        nan 1.         0.8        1.        ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [0.4        0.75       0.75       0.33333333 0.4               nan
+        nan 1.         0.66666667 1.        ]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [0.5        0.6        0.6        0.33333333 0.5               nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [0.33333333 1.         1.         0.33333333 0.33333333        nan
+        nan 1.         0.5        1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_roc_auc 
+value: [0.5        0.66666667 0.66666667 0.33333333 0.5               nan
+        nan 1.         0.75       1.        ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_jcc 
+value: [0.25 0.6  0.6  0.2  0.25  nan  nan 1.   0.5  1.  ]
+
+mean value: nan
+
+key: train_jcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+MCC on Blind test: 0.43
+
+Accuracy on Blind test: 0.75
+
+Model_name: Random Forest2 
+Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model',
+                 RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                                        n_estimators=1000, n_jobs=10,
+                                        oob_score=True, random_state=42))])
+
+key: fit_time 
+value: [0.77283669 0.89461184 0.82807112 0.81345439 0.84339142 0.90077519
+ 0.80800915 0.86690879 0.86262417 0.93805766]
+
+mean value: 0.8528740406036377
+
+key: score_time 
+value: [0.18392515 0.20116615 0.21894264 0.19048834 0.13761568 0.00455546
+ 0.0051651  0.17677927 0.19888759 0.21970034]
+
+mean value: 0.15372257232666015
+
+key: test_mcc 
+value: [ 0.          0.4472136   0.70710678 -0.33333333  0.                 nan
+         nan  1.          0.61237244  1.        ]
+
+mean value: nan
+
+key: train_mcc 
+value: [1.         0.96225045 1.         0.96225045 1.         0.96225045
+ 0.9258201  1.         1.         0.96296296]
+
+mean value: 0.9775534408683644
+
+key: test_accuracy 
+value: [0.5        0.66666667 0.83333333 0.33333333 0.5               nan
+        nan 1.         0.8        1.        ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1.         0.98076923 1.         0.98076923 1.         0.98076923
+ 0.96153846 1.         1.         0.98113208]
+
+mean value: 0.9884978229317852
+
+key: test_fscore 
+value: [0.4        0.75       0.85714286 0.33333333 0.4               nan
+        nan 1.         0.66666667 1.        ]
+
+mean value: nan
+
+key: train_fscore 
+value: [1.         0.98113208 1.         0.98039216 1.         0.98113208
+ 0.96296296 1.         1.         0.98113208]
+
+mean value: 0.9886751346240803
+
+key: test_precision 
+value: [0.5        0.6        0.75       0.33333333 0.5               nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_precision 
+value: [1.         0.96296296 1.         1.         1.         0.96296296
+ 0.92857143 1.         1.         0.96296296]
+
+mean value: 0.9817460317460317
+
+key: test_recall 
+value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
+  warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+[0.33333333 1.         1.         0.33333333 0.33333333        nan
+        nan 1.         0.5        1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1.         1.         1.         0.96153846 1.         1.
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9961538461538462
+
+key: test_roc_auc 
+value: [0.5        0.66666667 0.83333333 0.33333333 0.5               nan
+        nan 1.         0.75       1.        ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1.         0.98076923 1.         0.98076923 1.         0.98076923
+ 0.96153846 1.         1.         0.98148148]
+
+mean value: 0.9885327635327635
+
+key: test_jcc 
+value: [0.25 0.6  0.75 0.2  0.25  nan  nan 1.   0.5  1.  ]
+
+mean value: nan
+
+key: train_jcc 
+value: [1.         0.96296296 1.         0.96153846 1.         0.96296296
+ 0.92857143 1.         1.         0.96296296]
+
+mean value: 0.9778998778998779
+
+MCC on Blind test: 0.55
+
+Accuracy on Blind test: 0.8
+
+Model_name: Naive Bayes 
+Model func: BernoulliNB() 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', BernoulliNB())])
+
+key: fit_time 
+value: [0.020298   0.00831819 0.00845861 0.0083611  0.00830889 0.00845575
+ 0.00973797 0.00877762 0.00922585 0.00842929]
+
+mean value: 0.009837126731872559
+
+key: score_time 
+value: [0.01680541 0.00865436 0.00859785 0.00842381 0.00902247 0.00420737
+ 0.00420499 0.00842237 0.00900936 0.00894046]
+
+mean value: 0.00862884521484375
+
+key: test_mcc 
+value: [ 0.4472136  -0.33333333  0.         -0.33333333  0.                 nan
+         nan -0.33333333  0.66666667  0.16666667]
+
+mean value: nan
+
+key: train_mcc 
+value: [0.77151675 0.63245553 0.84615385 0.82305489 0.80829038 0.77849894
+ 0.65433031 0.74466871 0.74106548 0.73609205]
+
+mean value: 0.7536126885007749
+
+key: test_accuracy 
+value: [0.66666667 0.33333333 0.5        0.33333333 0.5               nan
+        nan 0.33333333 0.8        0.6       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538
+ 0.82692308 0.86538462 0.86792453 0.86792453]
+
+mean value: 0.8735849056603774
+
+key: test_fscore 
+value: [0.5        0.33333333 0.66666667 0.33333333 0.4               nan
+        nan 0.33333333 0.8        0.66666667]
+
+mean value: nan
+
+key: train_fscore 
+value: [0.88       0.7826087  0.92307692 0.89361702 0.90196078 0.875
+ 0.83018868 0.85106383 0.8627451  0.8627451 ]
+
+mean value: 0.8663006129430366
+
+key: test_precision 
+value: [1.         0.33333333 0.5        0.33333333 0.5               nan
+        nan 0.33333333 0.66666667 0.66666667]
+
+mean value: nan
+
+key: train_precision 
+value: [0.91666667 0.9        0.92307692 1.         0.92       0.95454545
+ 0.81481481 0.95238095 0.91666667 0.88      ]
+
+mean value: 0.9178151478151478
+
+key: test_recall 
+value: [0.33333333 0.33333333 1.         0.33333333 0.33333333        nan
+        nan 0.33333333 1.         0.66666667]
+
+mean value: nan
+
+key: train_recall 
+value: [0.84615385 0.69230769 0.92307692 0.80769231 0.88461538 0.80769231
+ 0.84615385 0.76923077 0.81481481 0.84615385]
+
+mean value: 0.8237891737891738
+
+key: test_roc_auc 
+value: [0.66666667 0.33333333 0.5        0.33333333 0.5               nan
+        nan 0.33333333 0.83333333 0.58333333]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538
+ 0.82692308 0.86538462 0.86894587 0.86752137]
+
+mean value: 0.8736467236467237
+
+key: test_jcc 
+value: [0.33333333 0.2        0.5        0.2        0.25              nan
+        nan 0.2        0.66666667 0.5       ]
+
+mean value: nan
+
+key: train_jcc 
+value: [0.78571429 0.64285714 0.85714286 0.80769231 0.82142857 0.77777778
+ 0.70967742 0.74074074 0.75862069 0.75862069]
+
+mean value: 0.7660272482018867
+
+MCC on Blind test: -0.03
+
+Accuracy on Blind test: 0.5
+
+Model_name: XGBoost 
+Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0) 
+List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000...
+                               interaction_constraints=None, learning_rate=None,
+                               max_delta_step=None, max_depth=None,
+                               min_child_weight=None, missing=nan,
+                               monotone_constraints=None, n_estimators=100,
+                               n_jobs=None, num_parallel_tree=None,
+                               predictor=None, random_state=42, reg_alpha=None,
+                               reg_lambda=None, scale_pos_weight=None,
+                               subsample=None, tree_method=None,
+                               use_label_encoder=False,
+                               validate_parameters=None, verbosity=0))])
+
+key: fit_time 
+value: [0.2208004  0.03246665 0.02982497 0.03053617 0.03095222 0.05028415
+ 0.04468656 0.03867793 0.03164172 0.07298398]
+
+mean value: 0.05828547477722168
+
+key: score_time 
+value: [0.01097751 0.01042724 0.01009774 0.01005578 0.01014471 0.0048039
+ 0.00504088 0.01020074 0.00997043 0.0110755 ]
+
+mean value: 0.009279441833496094
+
+key: test_mcc 
+value: [1.         1.         0.33333333 0.70710678 0.70710678        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [1.         1.         0.66666667 0.83333333 0.83333333        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [1.         1.         0.66666667 0.8        0.85714286        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [1.         1.         0.66666667 1.         0.75              nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [1.         1.         0.66666667 0.66666667 1.                nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_roc_auc 
+value: [1.         1.         0.66666667 0.83333333 0.83333333        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_jcc 
+value: [1.         1.         0.5        0.66666667 0.75              nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_jcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+MCC on Blind test: 0.89
+
+Accuracy on Blind test: 0.95
+
+Model_name: LDA 
+Model func: LinearDiscriminantAnalysis() 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', LinearDiscriminantAnalysis())])
+
+key: fit_time 
+value: [0.02003193 0.03504539 0.02579474 0.01418924 0.01421022 0.01418233
+ 0.0250864  0.03580475 0.03477359 0.03485084]
+
+mean value: 0.02539694309234619
+
+key: score_time 
+value: [0.02190375 0.0215292  0.01140451 0.01143599 0.01142645 0.00573635
+ 0.00591302 0.02012944 0.02000165 0.02008748]
+
+mean value: 0.014956784248352051
+
+key: test_mcc 
+value: [0.4472136  0.70710678 0.33333333 0.         0.                nan
+        nan 0.70710678 0.40824829 0.61237244]
+
+mean value: nan
+
+key: train_mcc 
+value: [1.         0.92307692 0.96225045 1.         1.         0.96225045
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9847577820375676
+
+key: test_accuracy 
+value: [0.66666667 0.83333333 0.66666667 0.5        0.5               nan
+        nan 0.83333333 0.6        0.8       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1.         0.96153846 0.98076923 1.         1.         0.98076923
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9923076923076923
+
+key: test_fscore 
+value: [0.75       0.85714286 0.66666667 0.4        0.4               nan
+        nan 0.85714286 0.66666667 0.85714286]
+
+mean value: nan
+
+key: train_fscore 
+value: [1.         0.96153846 0.98113208 1.         1.         0.98113208
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9923802612481858
+
+key: test_precision 
+value: [0.6        0.75       0.66666667 0.5        0.5               nan
+        nan 0.75       0.5        0.75      ]
+
+mean value: nan
+
+key: train_precision 
+value: [1.         0.96153846 0.96296296 1.         1.         0.96296296
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9887464387464387
+
+key: test_recall 
+value: [1.         1.         0.66666667 0.33333333 0.33333333        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1.         0.96153846 1.         1.         1.         1.
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9961538461538462
+
+key: test_roc_auc 
+value: [0.66666667 0.83333333 0.66666667 0.5        0.5               nan
+        nan 0.83333333 0.66666667 0.75      ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1.         0.96153846 0.98076923 1.         1.         0.98076923
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9923076923076923
+
+key: test_jcc 
+value: [0.6  0.75 0.5  0.25 0.25  nan  nan 0.75 0.5  0.75]
+
+mean value: nan
+
+key: train_jcc 
+value: [1.         0.92592593 0.96296296 1.         1.         0.96296296
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9851851851851852
+
+MCC on Blind test: 0.15
+
+Accuracy on Blind test: 0.6
+
+Model_name: Multinomial 
+Model func: MultinomialNB() 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', MultinomialNB())])
+
+key: fit_time 
+value: [0.02043128 0.0085032  0.0083077  0.008286   0.0082469  0.00821781
+ 0.00827694 0.00811672 0.0081346  0.00810599]
+
+mean value: 0.009462714195251465
+
+key: score_time 
+value: [0.00872779 0.00860095 0.0085876  0.0083971  0.00843334 0.00414538
+ 0.00438237 0.00828457 0.00826526 0.00826359]
+
+mean value: 0.007608795166015625
+
+key: test_mcc 
+value: [-0.4472136  -0.4472136   0.         -0.70710678 -0.33333333         nan
+         nan  1.          0.66666667  0.66666667]
+
+mean value: nan
+
+key: train_mcc 
+value: [0.6172134  0.58080232 0.73131034 0.57735027 0.6172134  0.65433031
+ 0.54006172 0.69230769 0.58487934 0.50927299]
+
+mean value: 0.6104741777466153
+
+key: test_accuracy 
+value: [0.33333333 0.33333333 0.5        0.16666667 0.33333333        nan
+        nan 1.         0.8        0.8       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [0.80769231 0.78846154 0.86538462 0.78846154 0.80769231 0.82692308
+ 0.76923077 0.84615385 0.79245283 0.75471698]
+
+mean value: 0.8047169811320755
+
+key: test_fscore 
+value: [0.         0.5        0.66666667 0.28571429 0.33333333        nan
+        nan 1.         0.8        0.8       ]
+
+mean value: nan
+
+key: train_fscore 
+value: [0.8        0.7755102  0.86792453 0.78431373 0.8        0.82352941
+ 0.77777778 0.84615385 0.8        0.74509804]
+
+mean value: 0.8020307532785732
+
+key: test_precision 
+value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+[0.         0.4        0.5        0.25       0.33333333        nan
+        nan 1.         0.66666667 1.        ]
+
+mean value: nan
+
+key: train_precision 
+value: [0.83333333 0.82608696 0.85185185 0.8        0.83333333 0.84
+ 0.75       0.84615385 0.78571429 0.76      ]
+
+mean value: 0.812647360690839
+
+key: test_recall 
+value: [0.         0.66666667 1.         0.33333333 0.33333333        nan
+        nan 1.         1.         0.66666667]
+
+mean value: nan
+
+key: train_recall 
+value: [0.76923077 0.73076923 0.88461538 0.76923077 0.76923077 0.80769231
+ 0.80769231 0.84615385 0.81481481 0.73076923]
+
+mean value: 0.793019943019943
+
+key: test_roc_auc 
+value: [0.33333333 0.33333333 0.5        0.16666667 0.33333333        nan
+        nan 1.         0.83333333 0.83333333]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [0.80769231 0.78846154 0.86538462 0.78846154 0.80769231 0.82692308
+ 0.76923077 0.84615385 0.79202279 0.7542735 ]
+
+mean value: 0.8046296296296296
+
+key: test_jcc 
+value: [0.         0.33333333 0.5        0.16666667 0.2               nan
+        nan 1.         0.66666667 0.66666667]
+
+mean value: nan
+
+key: train_jcc 
+value: [0.66666667 0.63333333 0.76666667 0.64516129 0.66666667 0.7
+ 0.63636364 0.73333333 0.66666667 0.59375   ]
+
+mean value: 0.6708608260019551
+
+MCC on Blind test: 0.41
+
+Accuracy on Blind test: 0.72
+
+Model_name: Passive Aggresive 
+Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model',
+                 PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
+
+key: fit_time 
+value: [0.00999832 0.01279449 0.01210523 0.01255631 0.01224685 0.01186943
+ 0.0117228  0.01643276 0.01289439 0.01178789]
+
+mean value: 0.012440848350524902
+
+key: score_time 
+value: [0.0086329  0.01154256 0.01120043 0.01119494 0.01112103 0.00591636
+ 0.00598788 0.01161695 0.01126814 0.01119208]
+
+mean value: 0.009967327117919922
+
+key: test_mcc 
+value: [ 0.33333333  0.33333333  0.         -0.33333333  0.                 nan
+         nan  1.          1.          0.61237244]
+
+mean value: nan
+
+key: train_mcc 
+value: [1.         0.82305489 0.9258201  0.96225045 1.         0.9258201
+ 0.72760688 0.9258201  0.96291111 0.85922733]
+
+mean value: 0.9112510953164517
+
+key: test_accuracy 
+value: [0.66666667 0.66666667 0.5        0.33333333 0.5               nan
+        nan 1.         1.         0.8       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1.         0.90384615 0.96153846 0.98076923 1.         0.96153846
+ 0.84615385 0.96153846 0.98113208 0.9245283 ]
+
+mean value: 0.9521044992743106
+
+key: test_fscore 
+value: [0.66666667 0.66666667 0.66666667 0.33333333 0.4               nan
+        nan 1.         1.         0.85714286]
+
+mean value: nan
+
+key: train_fscore 
+value: [1.         0.89361702 0.96296296 0.98039216 1.         0.96
+ 0.86666667 0.96296296 0.98181818 0.92857143]
+
+mean value: 0.9536991381121543
+
+key: test_precision 
+value: [0.66666667 0.66666667 0.5        0.33333333 0.5               nan
+        nan 1.         1.         0.75      ]
+
+mean value: nan
+
+key: train_precision 
+value: [1.         1.         0.92857143 1.         1.         1.
+ 0.76470588 0.92857143 0.96428571 0.86666667]
+
+mean value: 0.945280112044818
+
+key: test_recall 
+value: [0.66666667 0.66666667 1.         0.33333333 0.33333333        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1.         0.80769231 1.         0.96153846 1.         0.92307692
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9692307692307692
+
+key: test_roc_auc 
+value: [0.66666667 0.66666667 0.5        0.33333333 0.5               nan
+        nan 1.         1.         0.75      ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1.         0.90384615 0.96153846 0.98076923 1.         0.96153846
+ 0.84615385 0.96153846 0.98076923 0.92592593]
+
+mean value: 0.9522079772079772
+
+key: test_jcc 
+value: [0.5  0.5  0.5  0.2  0.25  nan  nan 1.   1.   0.75]
+
+mean value: nan
+
+key: train_jcc 
+value: [1.         0.80769231 0.92857143 0.96153846 1.         0.92307692
+ 0.76470588 0.92857143 0.96428571 0.86666667]
+
+mean value: 0.9145108812755872
+
+MCC on Blind test: 0.01
+
+Accuracy on Blind test: 0.52
+
+Model_name: Stochastic GDescent 
+Model func: SGDClassifier(n_jobs=10, random_state=42) 
+List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
+  _warn_prf(average, modifier, msg_start, len(result))
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', SGDClassifier(n_jobs=10, random_state=42))])
+
+key: fit_time 
+value: [0.01173449 0.0120728  0.01188064 0.01165843 0.01149154 0.01192474
+ 0.01300406 0.01200342 0.01213932 0.01185942]
+
+mean value: 0.011976885795593261
+
+key: score_time 
+value: [0.0111804  0.01164484 0.01130462 0.0112021  0.01118112 0.0060792
+ 0.00607777 0.01164412 0.01155257 0.01169229]
+
+mean value: 0.010355901718139649
+
+key: test_mcc 
+value: [ 0.          0.4472136   0.         -0.33333333  0.                 nan
+         nan  1.          1.          0.61237244]
+
+mean value: nan
+
+key: train_mcc 
+value: [0.82305489 0.71151247 1.         0.69693205 1.         0.82305489
+ 1.         0.85634884 1.         0.92724773]
+
+mean value: 0.8838150881832578
+
+key: test_accuracy 
+value: [0.5        0.66666667 0.5        0.33333333 0.5               nan
+        nan 1.         1.         0.8       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [0.90384615 0.84615385 1.         0.82692308 1.         0.90384615
+ 1.         0.92307692 1.         0.96226415]
+
+mean value: 0.936611030478955
+
+key: test_fscore 
+value: [0.         0.5        0.66666667 0.33333333 0.4               nan
+        nan 1.         1.         0.85714286]
+
+mean value: nan
+
+key: train_fscore 
+value: [0.89361702 0.82608696 1.         0.79069767 1.         0.89361702
+ 1.         0.92857143 1.         0.96296296]
+
+mean value: 0.9295553065027927
+
+key: test_precision 
+value: [0.         1.         0.5        0.33333333 0.5               nan
+        nan 1.         1.         0.75      ]
+
+mean value: nan
+
+key: train_precision 
+value: [1.         0.95       1.         1.         1.         1.
+ 1.         0.86666667 1.         0.92857143]
+
+mean value: 0.9745238095238096
+
+key: test_recall 
+value: [0.         0.33333333 1.         0.33333333 0.33333333        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [0.80769231 0.73076923 1.         0.65384615 1.         0.80769231
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9
+
+key: test_roc_auc 
+value: [0.5        0.66666667 0.5        0.33333333 0.5               nan
+        nan 1.         1.         0.75      ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [0.90384615 0.84615385 1.         0.82692308 1.         0.90384615
+ 1.         0.92307692 1.         0.96296296]
+
+mean value: 0.9366809116809117
+
+key: test_jcc 
+value: [0.         0.33333333 0.5        0.2        0.25              nan
+        nan 1.         1.         0.75      ]
+
+mean value: nan
+
+key: train_jcc 
+value: [0.80769231 0.7037037  1.         0.65384615 1.         0.80769231
+ 1.         0.86666667 1.         0.92857143]
+
+mean value: 0.8768172568172569
+
+MCC on Blind test: 0.18
+
+Accuracy on Blind test: 0.6
+
+Model_name: AdaBoost Classifier 
+Model func: AdaBoostClassifier(random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', AdaBoostClassifier(random_state=42))])
+
+key: fit_time 
+value: [0.08145452 0.07150626 0.07048178 0.07051468 0.07052875 0.07111621
+ 0.07133055 0.07053328 0.07560372 0.0774231 ]
+
+mean value: 0.07304928302764893
+
+key: score_time 
+value: [0.01477504 0.01447749 0.01440597 0.01421499 0.01418257 0.00448847
+ 0.00439095 0.01417637 0.0155952  0.01548576]
+
+mean value: 0.012619280815124511
+
+key: test_mcc 
+value: [0.4472136  1.         0.33333333 0.70710678 0.33333333        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [0.66666667 1.         0.66666667 0.83333333 0.66666667        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [0.5        1.         0.66666667 0.8        0.66666667        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [1.         1.         0.66666667 1.         0.66666667        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [0.33333333 1.         0.66666667 0.66666667 0.66666667        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_roc_auc 
+value: [0.66666667 1.         0.66666667 0.83333333 0.66666667        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_jcc 
+value: [0.33333333 1.         0.5        0.66666667 0.5               nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_jcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+MCC on Blind test: 0.95
+
+Accuracy on Blind test: 0.98
+
+Model_name: Bagging Classifier 
+Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model',
+                 BaggingClassifier(n_jobs=10, oob_score=True,
+                                   random_state=42))])
+
+key: fit_time 
+value: [0.03153086 0.02778888 0.03016424 0.03284216 0.04811215 0.02667236
+ 0.03099704 0.03483891 0.03757524 0.02810836]
+
+mean value: 0.03286302089691162
+
+key: score_time 
+value: [0.02119184 0.0220542  0.02267241 0.03023434 0.02344704 0.00497055
+ 0.01071739 0.03668547 0.02092147 0.01624227]
+
+mean value: 0.0209136962890625
+
+key: test_mcc 
+value: [0.70710678 0.70710678 0.33333333 0.70710678 0.33333333        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_mcc 
+value: [0.96225045 1.         1.         1.         1.         1.
+ 0.96225045 1.         0.96296296 1.        ]
+
+mean value: 0.9887463860261716
+
+key: test_accuracy 
+value: [0.83333333 0.83333333 0.66666667 0.83333333 0.66666667        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [0.98076923 1.         1.         1.         1.         1.
+ 0.98076923 1.         0.98113208 1.        ]
+
+mean value: 0.994267053701016
+
+key: test_fscore 
+value: [0.8        0.85714286 0.66666667 0.8        0.66666667        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_fscore 
+value: [0.98039216 1.         1.         1.         1.         1.
+ 0.98039216 1.         0.98113208 1.        ]
+
+mean value: 0.9941916389197188
+
+key: test_precision 
+value: [1.         0.75       0.66666667 1.         0.66666667        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [0.66666667 1.         0.66666667 0.66666667 0.66666667        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [0.96153846 1.         1.         1.         1.         1.
+ 0.96153846 1.         0.96296296 1.        ]
+
+mean value: 0.9886039886039886
+
+key: test_roc_auc 
+value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+[0.83333333 0.83333333 0.66666667 0.83333333 0.66666667        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [0.98076923 1.         1.         1.         1.         1.
+ 0.98076923 1.         0.98148148 1.        ]
+
+mean value: 0.9943019943019943
+
+key: test_jcc 
+value: [0.66666667 0.75       0.5        0.66666667 0.5               nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_jcc 
+value: [0.96153846 1.         1.         1.         1.         1.
+ 0.96153846 1.         0.96296296 1.        ]
+
+mean value: 0.9886039886039886
+
+MCC on Blind test: 0.89
+
+Accuracy on Blind test: 0.95
+
+Model_name: Gaussian Process 
+Model func: GaussianProcessClassifier(random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', GaussianProcessClassifier(random_state=42))])
+
+key: fit_time 
+value: [0.01145601 0.01071548 0.01049471 0.01030135 0.01034331 0.01053452
+ 0.01219153 0.0107038  0.01105452 0.01032877]
+
+mean value: 0.01081240177154541
+
+key: score_time 
+value: [0.00883389 0.00875664 0.00957775 0.00913954 0.00889802 0.00447464
+ 0.0049305  0.00890326 0.0089941  0.00859141]
+
+mean value: 0.008109974861145019
+
+key: test_mcc 
+value: [-0.33333333  0.          0.          0.          0.                 nan
+         nan -0.33333333  0.61237244  0.16666667]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [0.33333333 0.5        0.5        0.5        0.5               nan
+        nan 0.33333333 0.8        0.6       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [0.33333333 0.57142857 0.66666667 0.4        0.4               nan
+        nan 0.33333333 0.66666667 0.66666667]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [0.33333333 0.5        0.5        0.5        0.5               nan
+        nan 0.33333333 1.         0.66666667]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [0.33333333 0.66666667 1.         0.33333333 0.33333333        nan
+        nan 0.33333333 0.5        0.66666667]
+
+mean value: nan
+
+key: train_recall 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_roc_auc 
+value: [0.33333333 0.5        0.5        0.5        0.5               nan
+        nan 0.33333333 0.75       0.58333333]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_jcc 
+value: [0.2  0.4  0.5  0.25 0.25  nan  nan 0.2  0.5  0.5 ]
+
+mean value: nan
+
+key: train_jcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+MCC on Blind test: 0.1
+
+Accuracy on Blind test: 0.55
+
+Model_name: Gradient Boosting 
+Model func: GradientBoostingClassifier(random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
+  _warn_prf(average, modifier, msg_start, len(result))
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
+  warnings.warn("Variables are collinear")
+Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', GradientBoostingClassifier(random_state=42))])
+
+key: fit_time 
+value: [0.11957264 0.14153457 0.10522366 0.10514927 0.10289383 0.12513161
+ 0.12388921 0.12803459 0.12305784 0.10899067]
+
+mean value: 0.11834778785705566
+
+key: score_time 
+value: [0.00914145 0.00893044 0.00910687 0.00897527 0.00912714 0.00470376
+ 0.004632   0.00898361 0.00916672 0.00901628]
+
+mean value: 0.008178353309631348
+
+key: test_mcc 
+value: [1.         1.         0.33333333 0.70710678 0.70710678        nan
+        nan 0.70710678 1.         0.61237244]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [1.         1.         0.66666667 0.83333333 0.83333333        nan
+        nan 0.83333333 1.         0.8       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [1.         1.         0.66666667 0.85714286 0.8               nan
+        nan 0.8        1.         0.85714286]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [1.         1.         0.66666667 0.75       1.                nan
+        nan 1.         1.         0.75      ]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [1.         1.         0.66666667 1.         0.66666667        nan
+        nan 0.66666667 1.         1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_roc_auc 
+value: [1.         1.         0.66666667 0.83333333 0.83333333        nan
+        nan 0.83333333 1.         0.75      ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_jcc 
+value: [1.         1.         0.5        0.75       0.66666667        nan
+        nan 0.66666667 1.         0.75      ]
+
+mean value: nan
+
+key: train_jcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+MCC on Blind test: 0.63
+
+Accuracy on Blind test: 0.8
+
+Model_name: QDA 
+Model func: QuadraticDiscriminantAnalysis() 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', QuadraticDiscriminantAnalysis())])
+
+key: fit_time 
+value: [0.00893831 0.00888896 0.00908279 0.00880885 0.00884104 0.00885582
+ 0.00883555 0.00878549 0.00888848 0.01262331]
+
+mean value: 0.009254860877990722
+
+key: score_time 
+value: [0.00863051 0.00949979 0.00870037 0.00860357 0.00859833 0.00480151
+ 0.00421071 0.00872087 0.00892401 0.01160169]
+
+mean value: 0.00822913646697998
+
+key: test_mcc 
+value: [ 0.33333333  0.         -0.33333333  0.70710678  0.                 nan
+         nan  0.          0.40824829 -0.61237244]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [0.66666667 0.5        0.33333333 0.83333333 0.5               nan
+        nan 0.5        0.6        0.2       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [0.66666667 0.57142857 0.33333333 0.85714286 0.4               nan
+        nan 0.         0.66666667 0.        ]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [0.66666667 0.5        0.33333333 0.75       0.5               nan
+        nan 0.         0.5        0.        ]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [0.66666667 0.66666667 0.33333333 1.         0.33333333        nan
+        nan 0.         1.         0.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_roc_auc 
+value: [0.66666667 0.5        0.33333333 0.83333333 0.5               nan
+        nan 0.5        0.66666667 0.25      ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_jcc 
+value: [0.5  0.4  0.2  0.75 0.25  nan  nan 0.   0.5  0.  ]
+
+mean value: nan
+
+key: train_jcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+MCC on Blind test: -0.14
+
+Accuracy on Blind test: 0.48
+
+Model_name: Ridge Classifier 
+Model func: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+RidgeClassifier(random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', RidgeClassifier(random_state=42))])
+
+key: fit_time 
+value: [0.01256394 0.01242661 0.01233029 0.01234102 0.01236892 0.01238871
+ 0.01237059 0.01262426 0.01242614 0.0123601 ]
+
+mean value: 0.012420058250427246
+
+key: score_time 
+value: [0.01140499 0.01143169 0.01140809 0.01140833 0.01138973 0.00616193
+ 0.0060811  0.01142287 0.01136637 0.01145077]
+
+mean value: 0.010352587699890137
+
+key: test_mcc 
+value: [0.33333333 0.70710678 0.70710678 0.         0.                nan
+        nan 1.         1.         0.61237244]
+
+mean value: nan
+
+key: train_mcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_accuracy 
+value: [0.66666667 0.83333333 0.83333333 0.5        0.5               nan
+        nan 1.         1.         0.8       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_fscore 
+value: [0.66666667 0.85714286 0.85714286 0.57142857 0.4               nan
+        nan 1.         1.         0.85714286]
+
+mean value: nan
+
+key: train_fscore 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_precision 
+value: [0.66666667 0.75       0.75       0.5        0.5               nan
+        nan 1.         1.         0.75      ]
+
+mean value: nan
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_recall 
+value: [0.66666667 1.         1.         0.66666667 0.33333333        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_roc_auc 
+value: [0.66666667 0.83333333 0.83333333 0.5        0.5               nan
+        nan 1.         1.         0.75      ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+key: test_jcc 
+value: [0.5  0.75 0.75 0.4  0.25  nan  nan 1.   1.   0.75]
+
+mean value: nan
+
+key: train_jcc 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
+
+MCC on Blind test: 0.15
+
+Accuracy on Blind test: 0.6
+
+Model_name: Ridge ClassifierCV 
+Model func: RidgeClassifierCV(cv=10) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
+              gamma=0, gpu_id=-1, importance_type=None,
+              interaction_constraints='', learning_rate=0.300000012,
+              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
+              monotone_constraints='()', n_estimators=100, n_jobs=12,
+              num_parallel_tree=1, predictor='auto', random_state=42,
+              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
+              tree_method='exact', use_label_encoder=False,
+              validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['Other'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
+    return cache[method]
+KeyError: 'predict'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
+    scores = scorer(estimator, X_test, y_test)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
+    score = scorer._score(cached_call, estimator, *args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
+    y_pred = method_caller(estimator, "predict", X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
+    result = getattr(estimator, method)(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
+    Xt = transform.transform(Xt)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
+    Xs = self._fit_transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
+    return Parallel(n_jobs=self.n_jobs)(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
+    while self.dispatch_one_batch(iterator):
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
+    self._dispatch(tasks)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
+    job = self._backend.apply_async(batch, callback=cb)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
+    result = ImmediateResult(func)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
+    self.results = batch()
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
+    return [func(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
+    return self.function(*args, **kwargs)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
+    res = transformer.transform(X)
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
+    X_int, X_mask = self._transform(
+  File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
+    raise ValueError(msg)
+ValueError: Found unknown categories ['XDR'] in column 5 during transform
+
+  warnings.warn(
+/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:176: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame
+
+See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
+/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:179: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame
+
+See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
+Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', RidgeClassifierCV(cv=10))])
+
+key: fit_time 
+value: [0.07766771 0.07815814 0.07832098 0.08276105 0.10114908 0.09942913
+ 0.08842373 0.0795567  0.10013795 0.07987666]
+
+mean value: 0.086548113822937
+
+key: score_time 
+value: [0.01170754 0.01169586 0.01166344 0.01162314 0.01418304 0.00657368
+ 0.00637412 0.01173091 0.01183128 0.01019073]
+
+mean value: 0.010757374763488769
+
+key: test_mcc 
+value: [ 0.          0.70710678  0.70710678 -0.33333333  0.                 nan
+         nan  1.          1.          0.61237244]
+
+mean value: nan
+
+key: train_mcc 
+value: [0.88527041 1.         1.         1.         1.         1.
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9885270412757426
+
+key: test_accuracy 
+value: [0.5        0.83333333 0.83333333 0.33333333 0.5               nan
+        nan 1.         1.         0.8       ]
+
+mean value: nan
+
+key: train_accuracy 
+value: [0.94230769 1.         1.         1.         1.         1.
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9942307692307693
+
+key: test_fscore 
+value: [0.4        0.85714286 0.85714286 0.33333333 0.4               nan
+        nan 1.         1.         0.85714286]
+
+mean value: nan
+
+key: train_fscore 
+value: [0.94117647 1.         1.         1.         1.         1.
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9941176470588236
+
+key: test_precision 
+value: [0.5        0.75       0.75       0.33333333 0.5               nan
+        nan 1.         1.         0.75      ]
+
+mean value: nan
+
+key: train_precision 
+value: [0.96 1.   1.   1.   1.   1.   1.   1.   1.   1.  ]
+
+mean value: 0.996
+
+key: test_recall 
+value: [0.33333333 1.         1.         0.33333333 0.33333333        nan
+        nan 1.         1.         1.        ]
+
+mean value: nan
+
+key: train_recall 
+value: [0.92307692 1.         1.         1.         1.         1.
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9923076923076923
+
+key: test_roc_auc 
+value: [0.5        0.83333333 0.83333333 0.33333333 0.5               nan
+        nan 1.         1.         0.75      ]
+
+mean value: nan
+
+key: train_roc_auc 
+value: [0.94230769 1.         1.         1.         1.         1.
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9942307692307693
+
+key: test_jcc 
+value: [0.25 0.75 0.75 0.2  0.25  nan  nan 1.   1.   0.75]
+
+mean value: nan
+
+key: train_jcc 
+value: [0.88888889 1.         1.         1.         1.         1.
+ 1.         1.         1.         1.        ]
+
+mean value: 0.9888888888888889
+
+MCC on Blind test: 0.15
+
+Accuracy on Blind test: 0.6
+
+Model_name: Logistic Regression 
+Model func: LogisticRegression(random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', LogisticRegression(random_state=42))])
+
+key: fit_time 
+value: [0.02820563 0.02576756 0.02641773 0.02557731 0.02693248 0.02358747
+ 0.0255456  0.02283812 0.02356815 0.02982664]
+
+mean value: 0.025826668739318846
+
+key: score_time 
+value: [0.01189876 0.01176596 0.01171589 0.01177764 0.01182795 0.01168847
+ 0.01162362 0.01171899 0.01163292 0.01207137]
+
+mean value: 0.01177215576171875
+
+key: test_mcc 
+value: [0.81649658 0.81649658 0.5        0.21821789 0.81649658 0.21821789
+ 0.6        0.81649658 0.81649658 1.        ]
+
+mean value: 0.6618918685110615
+
+key: train_mcc 
+value: [0.91201231 0.93356387 0.93356387 0.91201231 0.88910845 0.91111111
+ 0.88910845 0.88910845 0.95555556 0.88910845]
+
+mean value: 0.9114252823784718
+
+key: test_accuracy 
+value: [0.9 0.9 0.7 0.6 0.9 0.6 0.8 0.9 0.9 1. ]
+
+mean value: 0.8200000000000001
+
+key: train_accuracy 
+value: [0.95555556 0.96666667 0.96666667 0.95555556 0.94444444 0.95555556
+ 0.94444444 0.94444444 0.97777778 0.94444444]
+
+mean value: 0.9555555555555556
+
+key: test_fscore 
+value: [0.90909091 0.90909091 0.57142857 0.66666667 0.90909091 0.66666667
+ 0.8        0.90909091 0.88888889 1.        ]
+
+mean value: 0.823001443001443
+
+key: train_fscore 
+value: [0.95454545 0.96703297 0.96703297 0.95652174 0.94505495 0.95555556
+ 0.94382022 0.94505495 0.97777778 0.94382022]
+
+mean value: 0.955621680062325
+
+key: test_precision 
+value: [0.83333333 0.83333333 1.         0.57142857 0.83333333 0.57142857
+ 0.8        0.83333333 1.         1.        ]
+
+mean value: 0.8276190476190476
+
+key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+train_precision 
+value: [0.97674419 0.95652174 0.95652174 0.93617021 0.93478261 0.95555556
+ 0.95454545 0.93478261 0.97777778 0.95454545]
+
+mean value: 0.9537947336888886
+
+key: test_recall 
+value: [1.  1.  0.4 0.8 1.  0.8 0.8 1.  0.8 1. ]
+
+mean value: 0.86
+
+key: train_recall 
+value: [0.93333333 0.97777778 0.97777778 0.97777778 0.95555556 0.95555556
+ 0.93333333 0.95555556 0.97777778 0.93333333]
+
+mean value: 0.9577777777777778
+
+key: test_roc_auc 
+value: [0.9 0.9 0.7 0.6 0.9 0.6 0.8 0.9 0.9 1. ]
+
+mean value: 0.8200000000000001
+
+key: train_roc_auc 
+value: [0.95555556 0.96666667 0.96666667 0.95555556 0.94444444 0.95555556
+ 0.94444444 0.94444444 0.97777778 0.94444444]
+
+mean value: 0.9555555555555556
+
+key: test_jcc 
+value: [0.83333333 0.83333333 0.4        0.5        0.83333333 0.5
+ 0.66666667 0.83333333 0.8        1.        ]
+
+mean value: 0.72
+
+key: train_jcc 
+value: [0.91304348 0.93617021 0.93617021 0.91666667 0.89583333 0.91489362
+ 0.89361702 0.89583333 0.95652174 0.89361702]
+
+mean value: 0.9152366635831021
+
+MCC on Blind test: 0.42
+
+Accuracy on Blind test: 0.75
+
+Model_name: Logistic RegressionCV 
+Model func: LogisticRegressionCV(random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', LogisticRegressionCV(random_state=42))])
+
+key: fit_time 
+value: [0.80905724 0.7472713  0.62414098 0.64533067 0.73280716 0.58336401
+ 0.60610628 0.74971604 0.74895    0.61796379]
+
+mean value: 0.6864707469940186
+
+key: score_time 
+value: [0.01288104 0.01487541 0.01242852 0.01178455 0.01245666 0.01180792
+ 0.01207376 0.01235223 0.01500702 0.01522446]
+
+mean value: 0.013089156150817871
+
+key: test_mcc 
+value: [0.65465367 0.81649658 0.21821789 0.5        0.81649658 0.21821789
+ 0.81649658 0.81649658 0.6        1.        ]
+
+mean value: 0.6457075774890866
+
+key: train_mcc 
+value: [1.         1.         0.95555556 1.         0.95555556 0.95555556
+ 0.93356387 1.         1.         1.        ]
+
+mean value: 0.980023053806288
+
+key: test_accuracy 
+value: [0.8 0.9 0.6 0.7 0.9 0.6 0.9 0.9 0.8 1. ]
+
+mean value: 0.81
+
+key: train_accuracy 
+value: [1.         1.         0.97777778 1.         0.97777778 0.97777778
+ 0.96666667 1.         1.         1.        ]
+
+mean value: 0.99
+
+key: test_fscore 
+value: [0.83333333 0.90909091 0.5        0.76923077 0.90909091 0.66666667
+ 0.88888889 0.90909091 0.8        1.        ]
+
+mean value: 0.8185392385392385
+
+key: train_fscore 
+value: [1.         1.         0.97777778 1.         0.97777778 0.97777778
+ 0.96629213 1.         1.         1.        ]
+
+mean value: 0.9899625468164794
+
+key: test_precision 
+value: [0.71428571 0.83333333 0.66666667 0.625      0.83333333 0.57142857
+ 1.         0.83333333 0.8        1.        ]
+
+mean value: 0.7877380952380952
+
+key: train_precision 
+value: [1.         1.         0.97777778 1.         0.97777778 0.97777778
+ 0.97727273 1.         1.         1.        ]
+
+mean value: 0.9910606060606061
+
+key: test_recall 
+value: [1.  1.  0.4 1.  1.  0.8 0.8 1.  0.8 1. ]
+
+mean value: 0.88
+
+key: train_recall 
+value: [1.         1.         0.97777778 1.         0.97777778 0.97777778
+ 0.95555556 1.         1.         1.        ]
+
+mean value: 0.9888888888888889
+
+key: test_roc_auc 
+value: [0.8 0.9 0.6 0.7 0.9 0.6 0.9 0.9 0.8 1. ]
+
+mean value: 0.81
+
+key: train_roc_auc 
+value: [1.         1.         0.97777778 1.         0.97777778 0.97777778
+ 0.96666667 1.         1.         1.        ]
+
+mean value: 0.99
+
+key: test_jcc 
+value: [0.71428571 0.83333333 0.33333333 0.625      0.83333333 0.5
+ 0.8        0.83333333 0.66666667 1.        ]
+
+mean value: 0.7139285714285715
+
+key: train_jcc 
+value: [1.         1.         0.95652174 1.         0.95652174 0.95652174
+ 0.93478261 1.         1.         1.        ]
+
+mean value: 0.9804347826086957
+
+MCC on Blind test: 0.48
+
+Accuracy on Blind test: 0.78
+
+Model_name: Gaussian NB 
+Model func: GaussianNB() 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', GaussianNB())])
+
+key: fit_time 
+value: [0.0124836  0.00930858 0.00983167 0.00957084 0.00956893 0.00951123
+ 0.00926447 0.0093565  0.00924706 0.00938511]
+
+mean value: 0.009752798080444335
+
+key: score_time 
+value: [0.01172256 0.00908756 0.00973558 0.00952482 0.00956821 0.0095439
+ 0.0094943  0.00932264 0.00934172 0.00925255]
+
+mean value: 0.009659385681152344
+
+key: test_mcc 
+value: [0.65465367 0.5        0.21821789 0.21821789 0.6        0.
+ 0.40824829 0.40824829 0.65465367 0.6       ]
+
+mean value: 0.4262239702815665
+
+key: train_mcc 
+value: [0.77777778 0.57642872 0.56980288 0.75724019 0.55708601 0.67488191
+ 0.67488191 0.65025037 0.5500191  0.65487619]
+
+mean value: 0.6443245048315153
+
+key: test_accuracy 
+value: [0.8 0.7 0.6 0.6 0.8 0.5 0.7 0.7 0.8 0.8]
 
 mean value: 0.7
 
-key: train_recall 
-value: [0.91111111 0.93333333 0.84444444 0.93333333 0.88888889 0.84444444
- 0.8        0.88888889 0.86666667 0.8       ]
+key: train_accuracy 
+value: [0.88888889 0.77777778 0.77777778 0.87777778 0.76666667 0.83333333
+ 0.83333333 0.82222222 0.76666667 0.82222222]
 
-mean value: 0.8711111111111112
+mean value: 0.8166666666666667
+
+key: test_fscore 
+value: [0.83333333 0.76923077 0.5        0.66666667 0.8        0.66666667
+ 0.72727273 0.72727273 0.83333333 0.8       ]
+
+mean value: 0.7323776223776224
+
+key: train_fscore 
+value: [0.88888889 0.80392157 0.8        0.88172043 0.7961165  0.84536082
+ 0.84536082 0.83333333 0.79207921 0.83673469]
+
+mean value: 0.8323516277094448
+
+key: test_precision 
+value: [0.71428571 0.625      0.66666667 0.57142857 0.8        0.5
+ 0.66666667 0.66666667 0.71428571 0.8       ]
+
+mean value: 0.6725
+
+key: train_precision 
+value: [0.88888889 0.71929825 0.72727273 0.85416667 0.70689655 0.78846154
+ 0.78846154 0.78431373 0.71428571 0.77358491]
+
+mean value: 0.774563050252582
+
+key: test_recall 
+value: [1.  1.  0.4 0.8 0.8 1.  0.8 0.8 1.  0.8]
+
+mean value: 0.8400000000000001
+
+key: train_recall 
+value: [0.88888889 0.91111111 0.88888889 0.91111111 0.91111111 0.91111111
+ 0.91111111 0.88888889 0.88888889 0.91111111]
+
+mean value: 0.9022222222222221
 
 key: test_roc_auc 
-value: [0.9 0.8 0.6 0.6 0.8 0.8 0.7 0.8 0.6 0.7]
+value: [0.8 0.7 0.6 0.6 0.8 0.5 0.7 0.7 0.8 0.8]
 
-mean value: 0.73
+mean value: 0.7000000000000001
 
 key: train_roc_auc 
-value: [0.93333333 0.93333333 0.92222222 0.93333333 0.92222222 0.92222222
- 0.88888889 0.92222222 0.91111111 0.9       ]
+value: [0.88888889 0.77777778 0.77777778 0.87777778 0.76666667 0.83333333
+ 0.83333333 0.82222222 0.76666667 0.82222222]
 
-mean value: 0.9188888888888889
+mean value: 0.8166666666666667
 
 key: test_jcc 
-value: [0.83333333 0.66666667 0.33333333 0.5        0.66666667 0.6
- 0.5        0.71428571 0.42857143 0.4       ]
+value: [0.71428571 0.625      0.33333333 0.5        0.66666667 0.5
+ 0.57142857 0.57142857 0.71428571 0.66666667]
 
-mean value: 0.5642857142857143
+mean value: 0.5863095238095238
 
 key: train_jcc 
-value: [0.87234043 0.875      0.84444444 0.875      0.85106383 0.84444444
- 0.7826087  0.85106383 0.82978723 0.8       ]
+value: [0.8        0.67213115 0.66666667 0.78846154 0.66129032 0.73214286
+ 0.73214286 0.71428571 0.6557377  0.71929825]
 
-mean value: 0.8425752903689999
+mean value: 0.714215705435333
 
-MCC on Blind test: 0.37
+MCC on Blind test: 0.07
 
-Accuracy on Blind test: 0.72
+Accuracy on Blind test: 0.52
+
+Model_name: Naive Bayes 
+Model func: BernoulliNB() 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', BernoulliNB())])
+
+key: fit_time 
+value: [0.01002598 0.00972629 0.00943542 0.009835   0.00988364 0.00961804
+ 0.00996184 0.00956464 0.00955367 0.00944734]
+
+mean value: 0.009705185890197754
+
+key: score_time 
+value: [0.0102427  0.00930071 0.00941706 0.00960851 0.00960922 0.00931358
+ 0.00965929 0.00924778 0.00924873 0.00928211]
+
+mean value: 0.009492969512939453
+
+key: test_mcc 
+value: [0.81649658 0.40824829 0.         0.40824829 0.40824829 0.6
+ 0.40824829 0.2        0.40824829 0.        ]
+
+mean value: 0.3657738033247041
+
+key: train_mcc 
+value: [0.62609903 0.58137767 0.60540551 0.58137767 0.62360956 0.71269665
+ 0.60238451 0.58969198 0.6681531  0.68957028]
+
+mean value: 0.6280365978811131
+
+key: test_accuracy 
+value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5]
+
+mean value: 0.6799999999999999
+
+key: train_accuracy 
+value: [0.81111111 0.78888889 0.8        0.78888889 0.81111111 0.85555556
+ 0.8        0.78888889 0.83333333 0.84444444]
+
+mean value: 0.8122222222222222
+
+key: test_fscore 
+value: [0.90909091 0.66666667 0.28571429 0.72727273 0.66666667 0.8
+ 0.72727273 0.6        0.66666667 0.44444444]
+
+mean value: 0.6493795093795094
+
+key: train_fscore 
+value: [0.8        0.77647059 0.78571429 0.77647059 0.8045977  0.85057471
+ 0.79069767 0.7654321  0.82758621 0.84090909]
+
+mean value: 0.8018452946967656
+
+key: test_precision 
+value: [0.83333333 0.75       0.5        0.66666667 0.75       0.8
+ 0.66666667 0.6        0.75       0.5       ]
+
+mean value: 0.6816666666666666
+
+key: train_precision 
+value: [0.85       0.825      0.84615385 0.825      0.83333333 0.88095238
+ 0.82926829 0.86111111 0.85714286 0.86046512]
+
+mean value: 0.8468426937655525
+
+key: test_recall 
+value: [1.  0.6 0.2 0.8 0.6 0.8 0.8 0.6 0.6 0.4]
+
+mean value: 0.64
+
+key: train_recall 
+value: [0.75555556 0.73333333 0.73333333 0.73333333 0.77777778 0.82222222
+ 0.75555556 0.68888889 0.8        0.82222222]
+
+mean value: 0.7622222222222222
+
+key: test_roc_auc 
+value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5]
+
+mean value: 0.68
+
+key: train_roc_auc 
+value: [0.81111111 0.78888889 0.8        0.78888889 0.81111111 0.85555556
+ 0.8        0.78888889 0.83333333 0.84444444]
+
+mean value: 0.8122222222222222
+
+key: test_jcc 
+value: [0.83333333 0.5        0.16666667 0.57142857 0.5        0.66666667
+ 0.57142857 0.42857143 0.5        0.28571429]
+
+mean value: 0.5023809523809524
+
+key: train_jcc 
+value: [0.66666667 0.63461538 0.64705882 0.63461538 0.67307692 0.74
+ 0.65384615 0.62       0.70588235 0.7254902 ]
+
+mean value: 0.6701251885369532
+
+MCC on Blind test: 0.18
+
+Accuracy on Blind test: 0.65
+
+Model_name: K-Nearest Neighbors 
+Model func: KNeighborsClassifier() 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', KNeighborsClassifier())])
+
+key: fit_time 
+value: [0.00937963 0.0080359  0.00806713 0.00866961 0.00906754 0.00908542
+ 0.00891066 0.00935316 0.009197   0.00908399]
+
+mean value: 0.008885002136230469
+
+key: score_time 
+value: [0.00974846 0.0091846  0.00914931 0.00993633 0.0101018  0.00967264
+ 0.01007986 0.01016092 0.01003218 0.00999641]
+
+mean value: 0.009806251525878907
+
+key: test_mcc 
+value: [ 0.81649658  0.2         0.21821789 -0.21821789  0.          0.40824829
+  0.40824829  0.6        -0.6         0.5       ]
+
+mean value: 0.2332993161855452
+
+key: train_mcc 
+value: [0.64700558 0.6        0.64700558 0.51111111 0.64508188 0.69162666
+ 0.62360956 0.55555556 0.48900965 0.62237591]
+
+mean value: 0.6032381499326708
+
+key: test_accuracy 
+value: [0.9 0.6 0.6 0.4 0.5 0.7 0.7 0.8 0.2 0.7]
+
+mean value: 0.61
+
+key: train_accuracy 
+value: [0.82222222 0.8        0.82222222 0.75555556 0.82222222 0.84444444
+ 0.81111111 0.77777778 0.74444444 0.81111111]
+
+mean value: 0.8011111111111111
+
+key: test_fscore 
+value: [0.90909091 0.6        0.5        0.5        0.54545455 0.72727273
+ 0.72727273 0.8        0.2        0.57142857]
+
+mean value: 0.6080519480519481
+
+key: train_fscore 
+value: [0.82978723 0.8        0.82978723 0.75555556 0.82608696 0.85106383
+ 0.8045977  0.77777778 0.74157303 0.80898876]
+
+mean value: 0.8025218086629647
+
+key: test_precision 
+value: [0.83333333 0.6        0.66666667 0.42857143 0.5        0.66666667
+ 0.66666667 0.8        0.2        1.        ]
+
+mean value: 0.6361904761904762
+
+key: train_precision 
+value: [0.79591837 0.8        0.79591837 0.75555556 0.80851064 0.81632653
+ 0.83333333 0.77777778 0.75       0.81818182]
+
+mean value: 0.795152238845248
+
+key: test_recall 
+value: [1.  0.6 0.4 0.6 0.6 0.8 0.8 0.8 0.2 0.4]
+
+mean value: 0.62
+
+key: train_recall 
+value: [0.86666667 0.8        0.86666667 0.75555556 0.84444444 0.88888889
+ 0.77777778 0.77777778 0.73333333 0.8       ]
+
+mean value: 0.8111111111111111
+
+key: test_roc_auc 
+value: [0.9 0.6 0.6 0.4 0.5 0.7 0.7 0.8 0.2 0.7]
+
+mean value: 0.61
+
+key: train_roc_auc 
+value: [0.82222222 0.8        0.82222222 0.75555556 0.82222222 0.84444444
+ 0.81111111 0.77777778 0.74444444 0.81111111]
+
+mean value: 0.8011111111111111
+
+key: test_jcc 
+value: [0.83333333 0.42857143 0.33333333 0.33333333 0.375      0.57142857
+ 0.57142857 0.66666667 0.11111111 0.4       ]
+
+mean value: 0.4624206349206349
+
+key: train_jcc 
+value: [0.70909091 0.66666667 0.70909091 0.60714286 0.7037037  0.74074074
+ 0.67307692 0.63636364 0.58928571 0.67924528]
+
+mean value: 0.6714407343180928
+
+MCC on Blind test: 0.0
+
+Accuracy on Blind test: 0.5
+
+Model_name: SVM 
+Model func: SVC(random_state=42) 
+List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+                       n_estimators=1000, n_jobs=10, oob_score=True,
+                       random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
+              colsample_bynode=None, colsample_bytree=None,
+              enable_categorical=False, gamma=None, gpu_id=None,
+              importance_type=None, interaction_constraints=None,
+              learning_rate=None, max_delta_step=None, max_depth=None,
+              min_child_weight=None, missing=nan, monotone_constraints=None,
+              n_estimators=100, n_jobs=None, num_parallel_tree=None,
+              predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
+              scale_pos_weight=None, subsample=None, tree_method=None,
+              use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
+Running model pipeline: Pipeline(steps=[('prep',
+                 ColumnTransformer(remainder='passthrough',
+                                   transformers=[('num', MinMaxScaler(),
+                                                  Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
+       'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
+       'mcsm_na_affinity', 'rsa',
+       ...
+       'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
+       'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
+      dtype='object', length=167)),
+                                                 ('cat', OneHotEncoder(),
+                                                  Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
+       'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
+      dtype='object'))])),
+                ('model', SVC(random_state=42))])
+
+key: fit_time 
+value: [0.01032925 0.01031327 0.01048875 0.0095973  0.00918651 0.01025271
+ 0.01038647 0.00908804 0.00948262 0.01041341]
+
+mean value: 0.009953832626342774
+
+key: score_time 
+value: [0.00930023 0.0093739  0.00958729 0.00855732 0.00871849 0.00915527
+ 0.00932598 0.00866413 0.00870752 0.00956011]
+
+mean value: 0.009095025062561036
+
+key: test_mcc 
+value: [0.81649658 0.81649658 0.21821789 0.6        0.40824829 0.40824829
+ 0.40824829 0.6        0.6        0.65465367]
+
+mean value: 0.553060959419101
+
+key: train_mcc 
+value: [0.8675239  0.84632727 0.84465303 0.84465303 0.88910845 0.93356387
+ 0.8230355  0.86666667 0.84465303 0.78086881]
+
+mean value: 0.854105354455519
+
+key: test_accuracy 
+value: [0.9 0.9 0.6 0.8 0.7 0.7 0.7 0.8 0.8 0.8]
+
+mean value: 0.77
+
+key: train_accuracy 
+value: [0.93333333 0.92222222 0.92222222 0.92222222 0.94444444 0.96666667
+ 0.91111111 0.93333333 0.92222222 0.88888889]
+
+mean value: 0.9266666666666666
+
+key: test_fscore 
+value: [0.90909091 0.90909091 0.5        0.8        0.66666667 0.72727273
+ 0.72727273 0.8        0.8        0.75      ]
+
+mean value: 0.7589393939393939
+
+key: train_fscore 
+value: [0.93181818 0.91954023 0.92134831 0.92307692 0.94382022 0.96629213
+ 0.90909091 0.93333333 0.92307692 0.88372093]
+
+mean value: 0.925511810467119
+
+key: test_precision 
+value: [0.83333333 0.83333333 0.66666667 0.8        0.75       0.66666667
+ 0.66666667 0.8        0.8        1.        ]
+
+mean value: 0.7816666666666667
+
+key: train_precision 
+value: [0.95348837 0.95238095 0.93181818 0.91304348 0.95454545 0.97727273
+ 0.93023256 0.93333333 0.91304348 0.92682927]
+
+mean value: 0.938598780439763
+
+key: test_recall 
+value: [1.  1.  0.4 0.8 0.6 0.8 0.8 0.8 0.8 0.6]
+
+mean value: 0.76
+
+key: train_recall 
+value: [0.91111111 0.88888889 0.91111111 0.93333333 0.93333333 0.95555556
+ 0.88888889 0.93333333 0.93333333 0.84444444]
+
+mean value: 0.9133333333333333
+
+key: test_roc_auc 
+value: [0.9 0.9 0.6 0.8 0.7 0.7 0.7 0.8 0.8 0.8]
+
+mean value: 0.77
+
+key: train_roc_auc 
+value: [0.93333333 0.92222222 0.92222222 0.92222222 0.94444444 0.96666667
+ 0.91111111 0.93333333 0.92222222 0.88888889]
+
+mean value: 0.9266666666666666
+
+key: test_jcc 
+value: [0.83333333 0.83333333 0.33333333 0.66666667 0.5        0.57142857
+ 0.57142857 0.66666667 0.66666667 0.6       ]
+
+mean value: 0.6242857142857142
+
+key: train_jcc 
+value: [0.87234043 0.85106383 0.85416667 0.85714286 0.89361702 0.93478261
+ 0.83333333 0.875      0.85714286 0.79166667]
+
+mean value: 0.8620256266243778
+
+MCC on Blind test: 0.21
+
+Accuracy on Blind test: 0.65
 
 Model_name: MLP 
 Model func: MLPClassifier(max_iter=500, random_state=42) 
@@ -23033,22 +23238,22 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', MLPClassifier(max_iter=500, random_state=42))])
 
 key: fit_time 
-value: [0.39881611 0.41650391 0.42689157 0.3460288  0.48951459 0.41404629
- 0.42462683 0.40188336 0.39558363 0.48087025]
+value: [0.37708092 0.3637991  0.45402408 0.35582471 0.38116431 0.36729598
+ 0.41565084 0.37348127 0.51866865 0.39506054]
 
-mean value: 0.4194765329360962
+mean value: 0.40020503997802737
 
 key: score_time 
-value: [0.01196933 0.01197171 0.01200271 0.01200986 0.01196694 0.01200986
- 0.01199579 0.01200604 0.01202822 0.01213503]
+value: [0.01199031 0.01207948 0.0120008  0.01200986 0.01195621 0.01205134
+ 0.01203179 0.01206517 0.01206875 0.0120163 ]
 
-mean value: 0.012009549140930175
+mean value: 0.01202700138092041
 
 key: test_mcc 
-value: [0.81649658 0.81649658 1.         0.40824829 0.81649658 0.6
- 0.40824829 0.5        0.81649658 0.40824829]
+value: [0.65465367 0.81649658 0.40824829 0.40824829 0.6        0.65465367
+ 0.81649658 0.81649658 0.40824829 1.        ]
 
-mean value: 0.6590731195102493
+mean value: 0.6583541955590722
 
 key: train_mcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23056,9 +23261,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.9 0.9 1.  0.7 0.9 0.8 0.7 0.7 0.9 0.7]
+value: [0.8 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.7 1. ]
 
-mean value: 0.82
+mean value: 0.8200000000000001
 
 key: train_accuracy 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23066,10 +23271,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.90909091 0.90909091 1.         0.72727273 0.88888889 0.8
- 0.72727273 0.76923077 0.88888889 0.66666667]
+value: [0.83333333 0.90909091 0.66666667 0.72727273 0.8        0.83333333
+ 0.88888889 0.90909091 0.72727273 1.        ]
 
-mean value: 0.8286402486402487
+mean value: 0.8294949494949495
 
 key: train_fscore 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23077,10 +23282,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.83333333 0.83333333 1.         0.66666667 1.         0.8
- 0.66666667 0.625      1.         0.75      ]
+value: [0.71428571 0.83333333 0.75       0.66666667 0.8        0.71428571
+ 1.         0.83333333 0.66666667 1.        ]
 
-mean value: 0.8175
+mean value: 0.7978571428571428
 
 key: train_precision 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23088,9 +23293,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [1.  1.  1.  0.8 0.8 0.8 0.8 1.  0.8 0.6]
+value: [1.  1.  0.6 0.8 0.8 1.  0.8 1.  0.8 1. ]
 
-mean value: 0.86
+mean value: 0.88
 
 key: train_recall 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23098,7 +23303,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.9 0.9 1.  0.7 0.9 0.8 0.7 0.7 0.9 0.7]
+value: [0.8 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.7 1. ]
 
 mean value: 0.8200000000000001
 
@@ -23108,10 +23313,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.83333333 0.83333333 1.         0.57142857 0.8        0.66666667
- 0.57142857 0.625      0.8        0.5       ]
+value: [0.71428571 0.83333333 0.5        0.57142857 0.66666667 0.71428571
+ 0.8        0.83333333 0.57142857 1.        ]
 
-mean value: 0.7201190476190477
+mean value: 0.7204761904761905
 
 key: train_jcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23153,22 +23358,22 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', DecisionTreeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.01629376 0.01320314 0.01022458 0.00981021 0.00979257 0.00974083
- 0.00949192 0.0102129  0.00986719 0.00993228]
+value: [0.01760101 0.01309419 0.01132631 0.01077938 0.010427   0.01067376
+ 0.01073265 0.01048017 0.01065993 0.01080036]
 
-mean value: 0.010856938362121583
+mean value: 0.011657476425170898
 
 key: score_time 
-value: [0.0115726  0.00897956 0.0086844  0.00850129 0.00838923 0.00844407
- 0.00838757 0.00920272 0.00845051 0.00858712]
+value: [0.01169825 0.00983357 0.00959349 0.00917745 0.00919628 0.00912952
+ 0.00917816 0.00921082 0.00917602 0.00917816]
 
-mean value: 0.008919906616210938
+mean value: 0.009537172317504884
 
 key: test_mcc 
-value: [0.81649658 0.81649658 0.81649658 0.65465367 0.81649658 0.81649658
- 0.81649658 0.81649658 0.6        0.81649658]
+value: [0.81649658 0.65465367 1.         0.81649658 0.81649658 1.
+ 0.40824829 0.81649658 0.65465367 1.        ]
 
-mean value: 0.7786626318129786
+mean value: 0.7983541955590722
 
 key: train_mcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23176,9 +23381,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.9 0.9 0.9 0.8 0.9 0.9 0.9 0.9 0.8 0.9]
+value: [0.9 0.8 1.  0.9 0.9 1.  0.7 0.9 0.8 1. ]
 
-mean value: 0.88
+mean value: 0.89
 
 key: train_accuracy 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23186,10 +23391,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.90909091 0.90909091 0.90909091 0.83333333 0.88888889 0.90909091
- 0.88888889 0.90909091 0.8        0.90909091]
+value: [0.90909091 0.83333333 1.         0.90909091 0.88888889 1.
+ 0.72727273 0.90909091 0.83333333 1.        ]
 
-mean value: 0.8865656565656566
+mean value: 0.901010101010101
 
 key: train_fscore 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23197,10 +23402,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.83333333 0.83333333 0.83333333 0.71428571 1.         0.83333333
- 1.         0.83333333 0.8        0.83333333]
+value: [0.83333333 0.71428571 1.         0.83333333 1.         1.
+ 0.66666667 0.83333333 0.71428571 1.        ]
 
-mean value: 0.8514285714285714
+mean value: 0.8595238095238096
 
 key: train_precision 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23208,9 +23413,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [1.  1.  1.  1.  0.8 1.  0.8 1.  0.8 1. ]
+value: [1.  1.  1.  1.  0.8 1.  0.8 1.  1.  1. ]
 
-mean value: 0.9400000000000001
+mean value: 0.96
 
 key: train_recall 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23218,9 +23423,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.9 0.9 0.9 0.8 0.9 0.9 0.9 0.9 0.8 0.9]
+value: [0.9 0.8 1.  0.9 0.9 1.  0.7 0.9 0.8 1. ]
 
-mean value: 0.88
+mean value: 0.89
 
 key: train_roc_auc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23228,10 +23433,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.83333333 0.83333333 0.83333333 0.71428571 0.8        0.83333333
- 0.8        0.83333333 0.66666667 0.83333333]
+value: [0.83333333 0.71428571 1.         0.83333333 0.8        1.
+ 0.57142857 0.83333333 0.71428571 1.        ]
 
-mean value: 0.7980952380952381
+mean value: 0.8300000000000001
 
 key: train_jcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23273,22 +23478,22 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', ExtraTreesClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.0893023  0.08990002 0.08999562 0.0806818  0.08832645 0.08767629
- 0.08837485 0.08918786 0.0886693  0.0875802 ]
+value: [0.08497882 0.08490729 0.08530784 0.08088517 0.07980752 0.08026528
+ 0.08012891 0.07985377 0.0795064  0.07973003]
 
-mean value: 0.08796947002410889
+mean value: 0.08153710365295411
 
 key: score_time 
-value: [0.01877093 0.01870561 0.01703525 0.01690221 0.01881409 0.01816916
- 0.01854229 0.01852489 0.01844764 0.01862383]
+value: [0.01674938 0.01818061 0.0167861  0.01685762 0.01680422 0.01698256
+ 0.01682544 0.01685238 0.01678157 0.01684117]
 
-mean value: 0.018253588676452638
+mean value: 0.01696610450744629
 
 key: test_mcc 
-value: [0.40824829 0.81649658 0.81649658 0.21821789 0.6        0.81649658
- 0.65465367 0.81649658 0.81649658 0.5       ]
+value: [0.65465367 0.65465367 0.65465367 0.81649658 0.65465367 0.40824829
+ 0.6        0.81649658 0.6        1.        ]
 
-mean value: 0.6463602756046463
+mean value: 0.6859856135151223
 
 key: train_mcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23296,9 +23501,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.7 0.9 0.9 0.6 0.8 0.9 0.8 0.9 0.9 0.7]
+value: [0.8 0.8 0.8 0.9 0.8 0.7 0.8 0.9 0.8 1. ]
 
-mean value: 0.81
+mean value: 0.8300000000000001
 
 key: train_accuracy 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23306,10 +23511,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.72727273 0.90909091 0.90909091 0.66666667 0.8        0.88888889
- 0.75       0.90909091 0.88888889 0.57142857]
+value: [0.83333333 0.83333333 0.75       0.90909091 0.75       0.72727273
+ 0.8        0.88888889 0.8        1.        ]
 
-mean value: 0.802041847041847
+mean value: 0.8291919191919191
 
 key: train_fscore 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23317,10 +23522,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.66666667 0.83333333 0.83333333 0.57142857 0.8        1.
- 1.         0.83333333 1.         1.        ]
+value: [0.71428571 0.71428571 1.         0.83333333 1.         0.66666667
+ 0.8        1.         0.8        1.        ]
 
-mean value: 0.8538095238095238
+mean value: 0.8528571428571429
 
 key: train_precision 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23328,9 +23533,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.8 1.  1.  0.8 0.8 0.8 0.6 1.  0.8 0.4]
+value: [1.  1.  0.6 1.  0.6 0.8 0.8 0.8 0.8 1. ]
 
-mean value: 0.8
+mean value: 0.84
 
 key: train_recall 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23338,9 +23543,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.7 0.9 0.9 0.6 0.8 0.9 0.8 0.9 0.9 0.7]
+value: [0.8 0.8 0.8 0.9 0.8 0.7 0.8 0.9 0.8 1. ]
 
-mean value: 0.81
+mean value: 0.8300000000000001
 
 key: train_roc_auc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23348,19 +23553,19 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.57142857 0.83333333 0.83333333 0.5        0.66666667 0.8
- 0.6        0.83333333 0.8        0.4       ]
+value: [0.71428571 0.71428571 0.6        0.83333333 0.6        0.57142857
+ 0.66666667 0.8        0.66666667 1.        ]
 
-mean value: 0.6838095238095239
+mean value: 0.7166666666666667
 
 key: train_jcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: 0.36
+MCC on Blind test: 0.49
 
-Accuracy on Blind test: 0.72
+Accuracy on Blind test: 0.78
 
 Model_name: Extra Tree 
 Model func: ExtraTreeClassifier(random_state=42) 
@@ -23393,22 +23598,22 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', ExtraTreeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.00986981 0.00918651 0.00949073 0.00941229 0.00946736 0.00948048
- 0.00914884 0.00917602 0.00864077 0.0092082 ]
+value: [0.00878644 0.00890398 0.0085156  0.00929928 0.00923872 0.00895572
+ 0.00925231 0.00933337 0.00873685 0.00847316]
 
-mean value: 0.009308099746704102
+mean value: 0.008949542045593261
 
 key: score_time 
-value: [0.00936985 0.00940537 0.00926566 0.00926495 0.00862026 0.00878453
- 0.00922108 0.00850725 0.00934005 0.00925589]
+value: [0.00883341 0.00892162 0.00849938 0.00916171 0.00890326 0.00888228
+ 0.00898838 0.00917315 0.00845098 0.00846076]
 
-mean value: 0.009103488922119141
+mean value: 0.008827495574951171
 
 key: test_mcc 
-value: [0.65465367 0.5        1.         0.5        0.6        0.
- 0.81649658 0.5        0.6        0.5       ]
+value: [0.81649658 0.65465367 0.40824829 0.81649658 0.6        0.21821789
+ 0.65465367 0.6        0.40824829 0.        ]
 
-mean value: 0.5671150251635704
+mean value: 0.5177014974435125
 
 key: train_mcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23416,9 +23621,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.8 0.7 1.  0.7 0.8 0.5 0.9 0.7 0.8 0.7]
+value: [0.9 0.8 0.7 0.9 0.8 0.6 0.8 0.8 0.7 0.5]
 
-mean value: 0.76
+mean value: 0.75
 
 key: train_accuracy 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23426,10 +23631,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.83333333 0.76923077 1.         0.76923077 0.8        0.54545455
- 0.88888889 0.76923077 0.8        0.57142857]
+value: [0.90909091 0.83333333 0.72727273 0.90909091 0.8        0.66666667
+ 0.83333333 0.8        0.72727273 0.54545455]
 
-mean value: 0.7746797646797647
+mean value: 0.7751515151515151
 
 key: train_fscore 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23437,10 +23642,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.71428571 0.625      1.         0.625      0.8        0.5
- 1.         0.625      0.8        1.        ]
+value: [0.83333333 0.71428571 0.66666667 0.83333333 0.8        0.57142857
+ 0.71428571 0.8        0.66666667 0.5       ]
 
-mean value: 0.7689285714285714
+mean value: 0.71
 
 key: train_precision 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23448,9 +23653,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [1.  1.  1.  1.  0.8 0.6 0.8 1.  0.8 0.4]
+value: [1.  1.  0.8 1.  0.8 0.8 1.  0.8 0.8 0.6]
 
-mean value: 0.84
+mean value: 0.86
 
 key: train_recall 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23458,9 +23663,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.8 0.7 1.  0.7 0.8 0.5 0.9 0.7 0.8 0.7]
+value: [0.9 0.8 0.7 0.9 0.8 0.6 0.8 0.8 0.7 0.5]
 
-mean value: 0.76
+mean value: 0.75
 
 key: train_roc_auc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23468,19 +23673,19 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.71428571 0.625      1.         0.625      0.66666667 0.375
- 0.8        0.625      0.66666667 0.4       ]
+value: [0.83333333 0.71428571 0.57142857 0.83333333 0.66666667 0.5
+ 0.71428571 0.66666667 0.57142857 0.375     ]
 
-mean value: 0.6497619047619048
+mean value: 0.6446428571428572
 
 key: train_jcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: -0.04
+MCC on Blind test: 0.03
 
-Accuracy on Blind test: 0.48
+Accuracy on Blind test: 0.5
 
 Model_name: Random Forest 
 Model func: RandomForestClassifier(n_estimators=1000, random_state=42) 
@@ -23534,22 +23739,22 @@ Pipeline(steps=[('prep',
                  RandomForestClassifier(n_estimators=1000, random_state=42))])
 
 key: fit_time 
-value: [1.15932965 1.05107379 1.05955648 1.02203918 1.01323223 1.02794361
- 1.01532245 1.02316165 1.02073669 1.01626611]
+value: [1.02410698 1.02078891 1.02329874 1.02577353 1.02832651 1.0975976
+ 1.09256458 1.01587534 1.0089438  1.00883484]
 
-mean value: 1.0408661842346192
+mean value: 1.0346110820770265
 
 key: score_time 
-value: [0.09322572 0.09367871 0.09359598 0.09217787 0.08568668 0.08590388
- 0.09312415 0.09303379 0.08970332 0.09149098]
+value: [0.09517384 0.09400702 0.09465933 0.09498215 0.09645772 0.0963223
+ 0.09489012 0.09161878 0.09524989 0.09460068]
 
-mean value: 0.091162109375
+mean value: 0.09479618072509766
 
 key: test_mcc 
-value: [0.6        0.65465367 1.         0.21821789 0.81649658 1.
- 0.81649658 0.81649658 0.6        0.5       ]
+value: [0.81649658 0.81649658 0.65465367 0.81649658 0.65465367 0.65465367
+ 0.81649658 0.81649658 0.6        0.81649658]
 
-mean value: 0.7022361303727148
+mean value: 0.7462940497690288
 
 key: train_mcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23557,9 +23762,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.8 0.8 1.  0.6 0.9 1.  0.9 0.9 0.8 0.7]
+value: [0.9 0.9 0.8 0.9 0.8 0.8 0.9 0.9 0.8 0.9]
 
-mean value: 0.84
+mean value: 0.86
 
 key: train_accuracy 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23567,10 +23772,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.8        0.83333333 1.         0.66666667 0.88888889 1.
- 0.88888889 0.90909091 0.8        0.57142857]
+value: [0.90909091 0.90909091 0.75       0.90909091 0.75       0.83333333
+ 0.88888889 0.88888889 0.8        0.88888889]
 
-mean value: 0.8358297258297258
+mean value: 0.8527272727272728
 
 key: train_fscore 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23578,10 +23783,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.8        0.71428571 1.         0.57142857 1.         1.
- 1.         0.83333333 0.8        1.        ]
+value: [0.83333333 0.83333333 1.         0.83333333 1.         0.71428571
+ 1.         1.         0.8        1.        ]
 
-mean value: 0.871904761904762
+mean value: 0.9014285714285715
 
 key: train_precision 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23589,9 +23794,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.8 1.  1.  0.8 0.8 1.  0.8 1.  0.8 0.4]
+value: [1.  1.  0.6 1.  0.6 1.  0.8 0.8 0.8 0.8]
 
-mean value: 0.8400000000000001
+mean value: 0.84
 
 key: train_recall 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23599,9 +23804,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.8 0.8 1.  0.6 0.9 1.  0.9 0.9 0.8 0.7]
+value: [0.9 0.9 0.8 0.9 0.8 0.8 0.9 0.9 0.8 0.9]
 
-mean value: 0.8400000000000001
+mean value: 0.86
 
 key: train_roc_auc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23609,17 +23814,17 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.66666667 0.71428571 1.         0.5        0.8        1.
- 0.8        0.83333333 0.66666667 0.4       ]
+value: [0.83333333 0.83333333 0.6        0.83333333 0.6        0.71428571
+ 0.8        0.8        0.66666667 0.8       ]
 
-mean value: 0.7380952380952381
+mean value: 0.7480952380952381
 
 key: train_jcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: 0.54
+MCC on Blind test: 0.55
 
 Accuracy on Blind test: 0.8
 
@@ -23659,103 +23864,103 @@ Running model pipeline: Pipeline(steps=[('prep',
                                         oob_score=True, random_state=42))])
 
 key: fit_time 
-value: [0.84170556 0.83023262 0.82111239 0.94291162 0.86192775 0.89486432
- 0.85397744 0.87075162 0.84488964 0.99061775]
+value: [0.84236526 0.86939478 0.84446883 0.89900541 0.81594372 0.85574555
+ 0.87060905 0.88659859 0.84814668 0.80270219]
 
-mean value: 0.875299072265625
+mean value: 0.8534980058670044
 
 key: score_time 
-value: [0.19168687 0.20659614 0.23333478 0.17990088 0.21830344 0.18155169
- 0.20949388 0.21993876 0.20162749 0.21077156]
+value: [0.18092632 0.22178912 0.15738559 0.22907305 0.1698842  0.20201349
+ 0.20945191 0.231884   0.2336936  0.21163177]
 
-mean value: 0.20532054901123048
+mean value: 0.20477330684661865
 
 key: test_mcc 
-value: [0.81649658 0.65465367 0.81649658 0.21821789 0.81649658 0.65465367
- 0.81649658 0.81649658 0.65465367 0.5       ]
+value: [0.81649658 1.         0.65465367 0.81649658 0.81649658 0.65465367
+ 0.40824829 0.81649658 0.6        0.81649658]
 
-mean value: 0.6764661806998554
+mean value: 0.7400038536518447
 
 key: train_mcc 
-value: [1.         0.97801929 0.97801929 1.         0.97801929 0.97801929
- 0.95650071 1.         0.95650071 1.        ]
+value: [0.97801929 0.97801929 0.95555556 0.97801929 0.97801929 0.95555556
+ 0.95555556 0.95555556 0.93356387 0.95555556]
 
-mean value: 0.9825078604565161
+mean value: 0.9623418824548596
 
 key: test_accuracy 
-value: [0.9 0.8 0.9 0.6 0.9 0.8 0.9 0.9 0.8 0.7]
+value: [0.9 1.  0.8 0.9 0.9 0.8 0.7 0.9 0.8 0.9]
 
-mean value: 0.8200000000000001
+mean value: 0.86
 
 key: train_accuracy 
-value: [1.         0.98888889 0.98888889 1.         0.98888889 0.98888889
- 0.97777778 1.         0.97777778 1.        ]
+value: [0.98888889 0.98888889 0.97777778 0.98888889 0.98888889 0.97777778
+ 0.97777778 0.97777778 0.96666667 0.97777778]
 
-mean value: 0.9911111111111112
+mean value: 0.9811111111111112
 
 key: test_fscore 
-value: [0.88888889 0.83333333 0.88888889 0.66666667 0.88888889 0.75
- 0.88888889 0.90909091 0.75       0.57142857]
+value: [0.90909091 1.         0.75       0.90909091 0.88888889 0.83333333
+ 0.72727273 0.88888889 0.8        0.88888889]
 
-mean value: 0.8036075036075037
+mean value: 0.8595454545454545
 
 key: train_fscore 
-value: [1.         0.98901099 0.98876404 1.         0.98876404 0.98876404
- 0.97727273 1.         0.97727273 1.        ]
+value: [0.98876404 0.98901099 0.97777778 0.98901099 0.98901099 0.97777778
+ 0.97777778 0.97777778 0.96629213 0.97777778]
 
-mean value: 0.9909848578387904
+mean value: 0.9810978035697137
 
 key: test_precision 
-value: [1.         0.71428571 1.         0.57142857 1.         1.
- 1.         0.83333333 1.         1.        ]
+value: [0.83333333 1.         1.         0.83333333 1.         0.71428571
+ 0.66666667 1.         0.8        1.        ]
 
-mean value: 0.9119047619047619
+mean value: 0.8847619047619047
 
 key: train_precision 
-value: [1.         0.97826087 1.         1.         1.         1.
- 1.         1.         1.         1.        ]
+value: [1.         0.97826087 0.97777778 0.97826087 0.97826087 0.97777778
+ 0.97777778 0.97777778 0.97727273 0.97777778]
 
-mean value: 0.9978260869565218
+mean value: 0.9800944224857269
 
 key: test_recall 
-value: [0.8 1.  0.8 0.8 0.8 0.6 0.8 1.  0.6 0.4]
+value: [1.  1.  0.6 1.  0.8 1.  0.8 0.8 0.8 0.8]
 
-mean value: 0.76
+mean value: 0.86
 
 key: train_recall 
 value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
   warn(
-[1.         1.         0.97777778 1.         0.97777778 0.97777778
- 0.95555556 1.         0.95555556 1.        ]
+[0.97777778 1.         0.97777778 1.         1.         0.97777778
+ 0.97777778 0.97777778 0.95555556 0.97777778]
 
-mean value: 0.9844444444444445
+mean value: 0.9822222222222222
 
 key: test_roc_auc 
-value: [0.9 0.8 0.9 0.6 0.9 0.8 0.9 0.9 0.8 0.7]
+value: [0.9 1.  0.8 0.9 0.9 0.8 0.7 0.9 0.8 0.9]
 
-mean value: 0.8200000000000001
+mean value: 0.86
 
 key: train_roc_auc 
-value: [1.         0.98888889 0.98888889 1.         0.98888889 0.98888889
- 0.97777778 1.         0.97777778 1.        ]
+value: [0.98888889 0.98888889 0.97777778 0.98888889 0.98888889 0.97777778
+ 0.97777778 0.97777778 0.96666667 0.97777778]
 
-mean value: 0.9911111111111112
+mean value: 0.981111111111111
 
 key: test_jcc 
-value: [0.8        0.71428571 0.8        0.5        0.8        0.6
- 0.8        0.83333333 0.6        0.4       ]
+value: [0.83333333 1.         0.6        0.83333333 0.8        0.71428571
+ 0.57142857 0.8        0.66666667 0.8       ]
 
-mean value: 0.6847619047619048
+mean value: 0.7619047619047619
 
 key: train_jcc 
-value: [1.         0.97826087 0.97777778 1.         0.97777778 0.97777778
- 0.95555556 1.         0.95555556 1.        ]
+value: [0.97777778 0.97826087 0.95652174 0.97826087 0.97826087 0.95652174
+ 0.95652174 0.95652174 0.93478261 0.95652174]
 
-mean value: 0.9822705314009662
+mean value: 0.9629951690821257
 
-MCC on Blind test: 0.6
+MCC on Blind test: 0.55
 
-Accuracy on Blind test: 0.82
+Accuracy on Blind test: 0.8
 
 Model_name: Naive Bayes 
 Model func: BernoulliNB() 
@@ -23788,101 +23993,101 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', BernoulliNB())])
 
 key: fit_time 
-value: [0.02315879 0.01028872 0.00969672 0.00906706 0.00864863 0.00874782
- 0.00876069 0.00871038 0.00945473 0.00973272]
+value: [0.02122283 0.0088222  0.00864434 0.00867176 0.00868058 0.00866842
+ 0.00870466 0.00863767 0.00891924 0.00864077]
 
-mean value: 0.010626626014709473
+mean value: 0.009961247444152832
 
 key: score_time 
-value: [0.01252508 0.00953984 0.00998116 0.00864005 0.00861979 0.00862026
- 0.0085988  0.00850296 0.00862026 0.00932288]
+value: [0.01298022 0.00939631 0.00880861 0.00860405 0.0086031  0.00863481
+ 0.00868678 0.00857902 0.00937891 0.00851655]
 
-mean value: 0.00929710865020752
+mean value: 0.009218835830688476
 
 key: test_mcc 
-value: [0.40824829 0.6        0.         0.21821789 0.40824829 0.65465367
- 0.6        0.65465367 0.5        0.        ]
+value: [0.81649658 0.40824829 0.         0.40824829 0.40824829 0.6
+ 0.40824829 0.2        0.40824829 0.        ]
 
-mean value: 0.4044021812579673
+mean value: 0.3657738033247041
 
 key: train_mcc 
-value: [0.68888889 0.58137767 0.60540551 0.66683134 0.64700558 0.55776344
- 0.69162666 0.58137767 0.71269665 0.70004007]
+value: [0.62609903 0.58137767 0.60540551 0.58137767 0.62360956 0.71269665
+ 0.60238451 0.58969198 0.6681531  0.68957028]
 
-mean value: 0.6433013479547345
+mean value: 0.6280365978811131
 
 key: test_accuracy 
-value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5]
+value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5]
 
-mean value: 0.69
+mean value: 0.6799999999999999
 
 key: train_accuracy 
-value: [0.84444444 0.78888889 0.8        0.83333333 0.82222222 0.77777778
- 0.84444444 0.78888889 0.85555556 0.84444444]
+value: [0.81111111 0.78888889 0.8        0.78888889 0.81111111 0.85555556
+ 0.8        0.78888889 0.83333333 0.84444444]
 
-mean value: 0.82
+mean value: 0.8122222222222222
 
 key: test_fscore 
-value: [0.72727273 0.8        0.44444444 0.66666667 0.66666667 0.75
- 0.8        0.83333333 0.57142857 0.28571429]
+value: [0.90909091 0.66666667 0.28571429 0.72727273 0.66666667 0.8
+ 0.72727273 0.6        0.66666667 0.44444444]
 
-mean value: 0.6545526695526696
+mean value: 0.6493795093795094
 
 key: train_fscore 
-value: [0.84444444 0.77647059 0.78571429 0.83146067 0.81395349 0.76744186
- 0.8372093  0.77647059 0.85057471 0.82926829]
+value: [0.8        0.77647059 0.78571429 0.77647059 0.8045977  0.85057471
+ 0.79069767 0.7654321  0.82758621 0.84090909]
 
-mean value: 0.8113008237276017
+mean value: 0.8018452946967656
 
 key: test_precision 
-value: [0.66666667 0.8        0.5        0.57142857 0.75       1.
- 0.8        0.71428571 1.         0.5       ]
+value: [0.83333333 0.75       0.5        0.66666667 0.75       0.8
+ 0.66666667 0.6        0.75       0.5       ]
 
-mean value: 0.7302380952380952
+mean value: 0.6816666666666666
 
 key: train_precision 
-value: [0.84444444 0.825      0.84615385 0.84090909 0.85365854 0.80487805
- 0.87804878 0.825      0.88095238 0.91891892]
+value: [0.85       0.825      0.84615385 0.825      0.83333333 0.88095238
+ 0.82926829 0.86111111 0.85714286 0.86046512]
 
-mean value: 0.851796404723234
+mean value: 0.8468426937655525
 
 key: test_recall 
-value: [0.8 0.8 0.4 0.8 0.6 0.6 0.8 1.  0.4 0.2]
+value: [1.  0.6 0.2 0.8 0.6 0.8 0.8 0.6 0.6 0.4]
 
 mean value: 0.64
 
 key: train_recall 
-value: [0.84444444 0.73333333 0.73333333 0.82222222 0.77777778 0.73333333
- 0.8        0.73333333 0.82222222 0.75555556]
+value: [0.75555556 0.73333333 0.73333333 0.73333333 0.77777778 0.82222222
+ 0.75555556 0.68888889 0.8        0.82222222]
 
-mean value: 0.7755555555555556
+mean value: 0.7622222222222222
 
 key: test_roc_auc 
-value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5]
+value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5]
 
-mean value: 0.6900000000000001
+mean value: 0.68
 
 key: train_roc_auc 
-value: [0.84444444 0.78888889 0.8        0.83333333 0.82222222 0.77777778
- 0.84444444 0.78888889 0.85555556 0.84444444]
+value: [0.81111111 0.78888889 0.8        0.78888889 0.81111111 0.85555556
+ 0.8        0.78888889 0.83333333 0.84444444]
 
-mean value: 0.82
+mean value: 0.8122222222222222
 
 key: test_jcc 
-value: [0.57142857 0.66666667 0.28571429 0.5        0.5        0.6
- 0.66666667 0.71428571 0.4        0.16666667]
+value: [0.83333333 0.5        0.16666667 0.57142857 0.5        0.66666667
+ 0.57142857 0.42857143 0.5        0.28571429]
 
-mean value: 0.5071428571428571
+mean value: 0.5023809523809524
 
 key: train_jcc 
-value: [0.73076923 0.63461538 0.64705882 0.71153846 0.68627451 0.62264151
- 0.72       0.63461538 0.74       0.70833333]
+value: [0.66666667 0.63461538 0.64705882 0.63461538 0.67307692 0.74
+ 0.65384615 0.62       0.70588235 0.7254902 ]
 
-mean value: 0.683584663763909
+mean value: 0.6701251885369532
 
-MCC on Blind test: 0.12
+MCC on Blind test: 0.18
 
-Accuracy on Blind test: 0.6
+Accuracy on Blind test: 0.65
 
 Model_name: XGBoost 
 Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
@@ -23928,22 +24133,22 @@ Running model pipeline: Pipeline(steps=[('prep',
                                validate_parameters=None, verbosity=0))])
 
 key: fit_time 
-value: [0.0782187  0.035676   0.03869748 0.19958353 0.03675485 0.04161453
- 0.23583102 0.50171185 0.18434334 0.07802296]
+value: [0.08190846 0.06687713 0.03293014 0.08744478 0.03286195 0.03449225
+ 0.03584909 0.0576427  0.03408647 0.03423762]
 
-mean value: 0.14304542541503906
+mean value: 0.049833059310913086
 
 key: score_time 
-value: [0.01095033 0.01015282 0.01051188 0.01092458 0.01066589 0.01025844
- 0.01311946 0.01248908 0.01308417 0.01076651]
+value: [0.01158047 0.01002455 0.01006317 0.01050115 0.01028323 0.010041
+ 0.01015139 0.01003408 0.00998139 0.01002693]
 
-mean value: 0.011292314529418946
+mean value: 0.010268735885620116
 
 key: test_mcc 
-value: [0.65465367 1.         0.81649658 0.81649658 0.81649658 1.
- 0.81649658 0.81649658 1.         1.        ]
+value: [0.81649658 0.81649658 0.81649658 0.81649658 1.         1.
+ 1.         0.81649658 0.81649658 1.        ]
 
-mean value: 0.8737136575346607
+mean value: 0.8898979485566356
 
 key: train_mcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23951,9 +24156,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.8 1.  0.9 0.9 0.9 1.  0.9 0.9 1.  1. ]
+value: [0.9 0.9 0.9 0.9 1.  1.  1.  0.9 0.9 1. ]
 
-mean value: 0.93
+mean value: 0.9400000000000001
 
 key: train_accuracy 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23961,10 +24166,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.83333333 1.         0.90909091 0.90909091 0.88888889 1.
- 0.88888889 0.90909091 1.         1.        ]
+value: [0.90909091 0.90909091 0.88888889 0.90909091 1.         1.
+ 1.         0.90909091 0.90909091 1.        ]
 
-mean value: 0.9338383838383838
+mean value: 0.9434343434343434
 
 key: train_fscore 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23972,10 +24177,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.71428571 1.         0.83333333 0.83333333 1.         1.
- 1.         0.83333333 1.         1.        ]
+value: [0.83333333 0.83333333 1.         0.83333333 1.         1.
+ 1.         0.83333333 0.83333333 1.        ]
 
-mean value: 0.9214285714285715
+mean value: 0.9166666666666667
 
 key: train_precision 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23983,9 +24188,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [1.  1.  1.  1.  0.8 1.  0.8 1.  1.  1. ]
+value: [1.  1.  0.8 1.  1.  1.  1.  1.  1.  1. ]
 
-mean value: 0.96
+mean value: 0.98
 
 key: train_recall 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -23993,9 +24198,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.8 1.  0.9 0.9 0.9 1.  0.9 0.9 1.  1. ]
+value: [0.9 0.9 0.9 0.9 1.  1.  1.  0.9 0.9 1. ]
 
-mean value: 0.93
+mean value: 0.9400000000000001
 
 key: train_roc_auc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24003,19 +24208,19 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.71428571 1.         0.83333333 0.83333333 0.8        1.
- 0.8        0.83333333 1.         1.        ]
+value: [0.83333333 0.83333333 0.8        0.83333333 1.         1.
+ 1.         0.83333333 0.83333333 1.        ]
 
-mean value: 0.8814285714285715
+mean value: 0.8966666666666667
 
 key: train_jcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: 0.89
+MCC on Blind test: 0.84
 
-Accuracy on Blind test: 0.95
+Accuracy on Blind test: 0.92
 
 Model_name: LDA 
 Model func: LinearDiscriminantAnalysis() 
@@ -24048,101 +24253,100 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', LinearDiscriminantAnalysis())])
 
 key: fit_time 
-value: [0.02349949 0.04713821 0.06608343 0.06277347 0.04959702 0.0388329
- 0.03968954 0.053617   0.0484674  0.04304004]
+value: [0.0152812  0.02139306 0.04124451 0.04086208 0.04085636 0.04082632
+ 0.04066205 0.04596925 0.04081917 0.04116321]
 
-mean value: 0.047273850440979
+mean value: 0.0369077205657959
 
 key: score_time 
-value: [0.02380705 0.02124381 0.0237565  0.03547454 0.02019763 0.02404904
- 0.02267098 0.02003694 0.0200119  0.02282238]
+value: [0.01144218 0.01933622 0.0208571  0.02243543 0.01998782 0.02180576
+ 0.01957369 0.02016187 0.02205682 0.01150918]
 
-mean value: 0.02340707778930664
+mean value: 0.018916606903076172
 
 key: test_mcc 
-value: [0.33333333 0.81649658 0.5        0.         0.21821789 0.81649658
- 0.81649658 0.33333333 0.81649658 0.21821789]
+value: [0.81649658 0.65465367 0.40824829 0.65465367 0.2        0.21821789
+ 1.         0.40824829 0.40824829 0.40824829]
 
-mean value: 0.48690887708495556
+mean value: 0.5177014974435125
 
 key: train_mcc 
 value: [1.         1.         1.         1.         0.97801929 1.
- 1.         1.         0.97801929 0.97801929]
+ 1.         1.         1.         1.        ]
 
-mean value: 0.9934057881530954
+mean value: 0.9978019293843652
 
 key: test_accuracy 
-value: [0.6 0.9 0.7 0.5 0.6 0.9 0.9 0.6 0.9 0.6]
+value: [0.9 0.8 0.7 0.8 0.6 0.6 1.  0.7 0.7 0.7]
 
-mean value: 0.72
+mean value: 0.75
 
 key: train_accuracy 
 value: [1.         1.         1.         1.         0.98888889 1.
- 1.         1.         0.98888889 0.98888889]
-
-mean value: 0.9966666666666667
-
-key: test_fscore 
-value: [0.71428571 0.90909091 0.76923077 0.61538462 0.66666667 0.90909091
- 0.90909091 0.71428571 0.88888889 0.5       ]
-
-mean value: 0.7596015096015096
-
-key: train_fscore 
-value: [1.         1.         1.         1.         0.98901099 1.
- 1.         1.         0.98876404 0.98876404]
-
-mean value: 0.996653907889863
-
-key: test_precision 
-value: [0.55555556 0.83333333 0.625      0.5        0.57142857 0.83333333
- 0.83333333 0.55555556 1.         0.66666667]
-
-mean value: 0.6974206349206349
-
-key: train_precision 
-value: [1.         1.         1.         1.         0.97826087 1.
  1.         1.         1.         1.        ]
 
-mean value: 0.9978260869565218
+mean value: 0.9988888888888889
+
+key: test_fscore 
+value: [0.90909091 0.83333333 0.66666667 0.83333333 0.6        0.66666667
+ 1.         0.72727273 0.72727273 0.72727273]
+
+mean value: 0.769090909090909
+
+key: train_fscore 
+value: [1.         1.         1.         1.         0.98876404 1.
+ 1.         1.         1.         1.        ]
+
+mean value: 0.998876404494382
+
+key: test_precision 
+value: [0.83333333 0.71428571 0.75       0.71428571 0.6        0.57142857
+ 1.         0.66666667 0.66666667 0.66666667]
+
+mean value: 0.7183333333333333
+
+key: train_precision 
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
+
+mean value: 1.0
 
 key: test_recall 
-value: [1.  1.  1.  0.8 0.8 1.  1.  1.  0.8 0.4]
+value: [1.  1.  0.6 1.  0.6 0.8 1.  0.8 0.8 0.8]
 
-mean value: 0.88
+mean value: 0.84
 
 key: train_recall 
-value: [1.         1.         1.         1.         1.         1.
- 1.         1.         0.97777778 0.97777778]
+value: [1.         1.         1.         1.         0.97777778 1.
+ 1.         1.         1.         1.        ]
 
-mean value: 0.9955555555555555
+mean value: 0.9977777777777778
 
 key: test_roc_auc 
-value: [0.6 0.9 0.7 0.5 0.6 0.9 0.9 0.6 0.9 0.6]
+value: [0.9 0.8 0.7 0.8 0.6 0.6 1.  0.7 0.7 0.7]
 
-mean value: 0.72
+mean value: 0.75
 
 key: train_roc_auc 
 value: [1.         1.         1.         1.         0.98888889 1.
- 1.         1.         0.98888889 0.98888889]
+ 1.         1.         1.         1.        ]
 
-mean value: 0.9966666666666667
+mean value: 0.9988888888888889
 
 key: test_jcc 
-value: [0.55555556 0.83333333 0.625      0.44444444 0.5        0.83333333
- 0.83333333 0.55555556 0.8        0.33333333]
+value: [0.83333333 0.71428571 0.5        0.71428571 0.42857143 0.5
+ 1.         0.57142857 0.57142857 0.57142857]
 
-mean value: 0.6313888888888889
+mean value: 0.6404761904761904
 
 key: train_jcc 
-value: [1.         1.         1.         1.         0.97826087 1.
- 1.         1.         0.97777778 0.97777778]
+value: [1.         1.         1.         1.         0.97777778 1.
+ 1.         1.         1.         1.        ]
 
-mean value: 0.9933816425120773
+mean value: 0.9977777777777778
 
-MCC on Blind test: 0.01
+MCC on Blind test: -0.1
 
-Accuracy on Blind test: 0.52
+Accuracy on Blind test: 0.48
 
 Model_name: Multinomial 
 Model func: MultinomialNB() 
@@ -24175,101 +24379,101 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', MultinomialNB())])
 
 key: fit_time 
-value: [0.01617742 0.00876117 0.00850463 0.0087924  0.00857806 0.00921059
- 0.00938845 0.0089817  0.00929332 0.00935221]
+value: [0.01152873 0.00863695 0.00857687 0.00819945 0.00833392 0.00825739
+ 0.0083611  0.00821066 0.00823236 0.00819683]
 
-mean value: 0.009703993797302246
+mean value: 0.00865342617034912
 
 key: score_time 
-value: [0.00881886 0.0085144  0.00851989 0.00928068 0.00837755 0.00896335
- 0.00911069 0.00911427 0.00912476 0.00914335]
+value: [0.01131368 0.00865769 0.0086081  0.00834394 0.00836015 0.00839067
+ 0.00829577 0.00829291 0.00828815 0.00827575]
 
-mean value: 0.008896780014038087
+mean value: 0.008682680130004884
 
 key: test_mcc 
-value: [0.40824829 0.40824829 0.21821789 0.40824829 0.40824829 0.65465367
- 0.         0.6        0.2        0.21821789]
+value: [0.6        0.65465367 0.         0.40824829 0.40824829 0.40824829
+ 0.40824829 0.2        0.40824829 0.5       ]
 
-mean value: 0.3524082613035414
+mean value: 0.3995895123027292
 
 key: train_mcc 
-value: [0.62237591 0.62237591 0.58137767 0.60238451 0.56056066 0.60540551
- 0.56056066 0.55610507 0.56056066 0.55610507]
+value: [0.55610507 0.49193496 0.60059347 0.51314236 0.55610507 0.57906602
+ 0.57906602 0.53452248 0.60238451 0.60238451]
 
-mean value: 0.5827811645657781
+mean value: 0.5615304473142139
 
 key: test_accuracy 
-value: [0.7 0.7 0.6 0.7 0.7 0.8 0.5 0.8 0.6 0.6]
-
-mean value: 0.6699999999999999
-
-key: train_accuracy 
-value: [0.81111111 0.81111111 0.78888889 0.8        0.77777778 0.8
- 0.77777778 0.77777778 0.77777778 0.77777778]
-
-mean value: 0.79
-
-key: test_fscore 
-value: [0.72727273 0.72727273 0.5        0.72727273 0.72727273 0.75
- 0.54545455 0.8        0.6        0.5       ]
-
-mean value: 0.6604545454545454
-
-key: train_fscore 
-value: [0.80898876 0.81318681 0.77647059 0.80851064 0.76190476 0.78571429
- 0.76190476 0.77272727 0.76190476 0.77272727]
-
-mean value: 0.782403992064804
-
-key: test_precision 
-value: [0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 1.
- 0.5        0.8        0.6        0.66666667]
+value: [0.8 0.8 0.5 0.7 0.7 0.7 0.7 0.6 0.7 0.7]
 
 mean value: 0.69
 
-key: train_precision 
-value: [0.81818182 0.80434783 0.825      0.7755102  0.82051282 0.84615385
- 0.82051282 0.79069767 0.82051282 0.79069767]
+key: train_accuracy 
+value: [0.77777778 0.74444444 0.8        0.75555556 0.77777778 0.78888889
+ 0.78888889 0.76666667 0.8        0.8       ]
 
-mean value: 0.8112127504879925
+mean value: 0.78
+
+key: test_fscore 
+value: [0.8        0.75       0.44444444 0.72727273 0.72727273 0.72727273
+ 0.72727273 0.6        0.66666667 0.57142857]
+
+mean value: 0.6741630591630591
+
+key: train_fscore 
+value: [0.77272727 0.72941176 0.79545455 0.74418605 0.77272727 0.7816092
+ 0.7816092  0.75862069 0.79069767 0.79069767]
+
+mean value: 0.7717741331423581
+
+key: test_precision 
+value: [0.8        1.         0.5        0.66666667 0.66666667 0.66666667
+ 0.66666667 0.6        0.75       1.        ]
+
+mean value: 0.7316666666666667
+
+key: train_precision 
+value: [0.79069767 0.775      0.81395349 0.7804878  0.79069767 0.80952381
+ 0.80952381 0.78571429 0.82926829 0.82926829]
+
+mean value: 0.801413513221511
 
 key: test_recall 
-value: [0.8 0.8 0.4 0.8 0.8 0.6 0.6 0.8 0.6 0.4]
+value: [0.8 0.6 0.4 0.8 0.8 0.8 0.8 0.6 0.6 0.4]
 
 mean value: 0.66
 
 key: train_recall 
-value: [0.8        0.82222222 0.73333333 0.84444444 0.71111111 0.73333333
- 0.71111111 0.75555556 0.71111111 0.75555556]
+value: [0.75555556 0.68888889 0.77777778 0.71111111 0.75555556 0.75555556
+ 0.75555556 0.73333333 0.75555556 0.75555556]
 
-mean value: 0.7577777777777778
+mean value: 0.7444444444444445
 
 key: test_roc_auc 
-value: [0.7 0.7 0.6 0.7 0.7 0.8 0.5 0.8 0.6 0.6]
+value: [0.8 0.8 0.5 0.7 0.7 0.7 0.7 0.6 0.7 0.7]
 
-mean value: 0.67
+mean value: 0.6900000000000001
 
 key: train_roc_auc 
-value: [0.81111111 0.81111111 0.78888889 0.8        0.77777778 0.8
- 0.77777778 0.77777778 0.77777778 0.77777778]
+value: [0.77777778 0.74444444 0.8        0.75555556 0.77777778 0.78888889
+ 0.78888889 0.76666667 0.8        0.8       ]
 
-mean value: 0.79
+mean value: 0.78
 
 key: test_jcc 
-value: [0.57142857 0.57142857 0.33333333 0.57142857 0.57142857 0.6
- 0.375      0.66666667 0.42857143 0.33333333]
+value: [0.66666667 0.6        0.28571429 0.57142857 0.57142857 0.57142857
+ 0.57142857 0.42857143 0.5        0.4       ]
 
-mean value: 0.5022619047619047
+mean value: 0.5166666666666666
 
 key: train_jcc 
-value: [0.67924528 0.68518519 0.63461538 0.67857143 0.61538462 0.64705882
- 0.61538462 0.62962963 0.61538462 0.62962963]
+value: [0.62962963 0.57407407 0.66037736 0.59259259 0.62962963 0.64150943
+ 0.64150943 0.61111111 0.65384615 0.65384615]
 
-mean value: 0.6430089210333384
+mean value: 0.628812557114444
 
-MCC on Blind test: 0.3
+MCC on Blind test: 0.05
 
-Accuracy on Blind test: 0.68
+Accuracy on Blind test: 0.57
 
 Model_name: Passive Aggresive 
 Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42) 
@@ -24303,105 +24507,107 @@ Running model pipeline: Pipeline(steps=[('prep',
                  PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
 
 key: fit_time 
-value: [0.01009655 0.01374936 0.0128026  0.01307821 0.01348066 0.01305318
- 0.01311874 0.01353908 0.01283336 0.01359773]
+value: [0.01000237 0.01344228 0.01320529 0.01263332 0.01426673 0.01389861
+ 0.01293731 0.01380181 0.01295614 0.01367664]
 
-mean value: 0.01293494701385498
+mean value: 0.013082051277160644
 
 key: score_time 
-value: [0.0084734  0.01106596 0.01120782 0.0111711  0.0113287  0.01123643
- 0.01121616 0.01123714 0.01122808 0.01128221]
+value: [0.00835109 0.01133704 0.01144195 0.01147938 0.01144743 0.01136518
+ 0.011235   0.0112555  0.01120543 0.01124072]
 
-mean value: 0.010944700241088868
+mean value: 0.011035871505737305
 
 key: test_mcc 
-value: [0.65465367 0.81649658 0.81649658 0.21821789 0.81649658 0.5
- 0.65465367 0.65465367 0.81649658 0.40824829]
+value: [0.81649658 0.81649658 0.40824829 0.40824829 0.81649658 0.5
+ 0.81649658 0.65465367 0.81649658 1.        ]
 
-mean value: 0.6356413516534691
+mean value: 0.7053633156274334
 
 key: train_mcc 
-value: [0.93541435 0.93541435 0.91473203 0.97801929 0.97801929 0.89442719
- 0.91201231 0.93541435 0.97801929 0.95650071]
+value: [1.         0.95650071 0.89442719 0.91201231 0.91473203 0.95650071
+ 0.93356387 0.93541435 0.95555556 0.97801929]
 
-mean value: 0.9417973170390846
+mean value: 0.9436726030863619
 
 key: test_accuracy 
-value: [0.8 0.9 0.9 0.6 0.9 0.7 0.8 0.8 0.9 0.7]
-
-mean value: 0.8
-
-key: train_accuracy 
-value: [0.96666667 0.96666667 0.95555556 0.98888889 0.98888889 0.94444444
- 0.95555556 0.96666667 0.98888889 0.97777778]
-
-mean value: 0.97
-
-key: test_fscore 
-value: [0.83333333 0.90909091 0.90909091 0.66666667 0.88888889 0.57142857
- 0.83333333 0.83333333 0.88888889 0.66666667]
-
-mean value: 0.80007215007215
-
-key: train_fscore 
-value: [0.96774194 0.96774194 0.95744681 0.98901099 0.98901099 0.94117647
- 0.95652174 0.96774194 0.98901099 0.97727273]
-
-mean value: 0.9702676518986616
-
-key: test_precision 
-value: [0.71428571 0.83333333 0.83333333 0.57142857 1.         1.
- 0.71428571 0.71428571 1.         0.75      ]
-
-mean value: 0.8130952380952381
-
-key: train_precision 
-value: [0.9375     0.9375     0.91836735 0.97826087 0.97826087 1.
- 0.93617021 0.9375     0.97826087 1.        ]
-
-mean value: 0.9601820168400386
-
-key: test_recall 
-value: [1.  1.  1.  0.8 0.8 0.4 1.  1.  0.8 0.6]
+value: [0.9 0.9 0.7 0.7 0.9 0.7 0.9 0.8 0.9 1. ]
 
 mean value: 0.84
 
-key: train_recall 
-value: [1.         1.         1.         1.         1.         0.88888889
- 0.97777778 1.         1.         0.95555556]
+key: train_accuracy 
+value: [1.         0.97777778 0.94444444 0.95555556 0.95555556 0.97777778
+ 0.96666667 0.96666667 0.97777778 0.98888889]
 
-mean value: 0.9822222222222222
+mean value: 0.9711111111111111
+
+key: test_fscore 
+value: [0.90909091 0.90909091 0.66666667 0.72727273 0.90909091 0.76923077
+ 0.88888889 0.83333333 0.88888889 1.        ]
+
+mean value: 0.8501554001554001
+
+key: train_fscore 
+value: [1.         0.97727273 0.94736842 0.95652174 0.95744681 0.97826087
+ 0.96703297 0.96774194 0.97777778 0.98901099]
+
+mean value: 0.9718434234837254
+
+key: test_precision 
+value: [0.83333333 0.83333333 0.75       0.66666667 0.83333333 0.625
+ 1.         0.71428571 1.         1.        ]
+
+mean value: 0.825595238095238
+
+key: train_precision 
+value: [1.         1.         0.9        0.93617021 0.91836735 0.95744681
+ 0.95652174 0.9375     0.97777778 0.97826087]
+
+mean value: 0.9562044754688801
+
+key: test_recall 
+value: [1.  1.  0.6 0.8 1.  1.  0.8 1.  0.8 1. ]
+
+mean value: 0.9
+
+key: train_recall 
+value: [1.         0.95555556 1.         0.97777778 1.         1.
+ 0.97777778 1.         0.97777778 1.        ]
+
+mean value: 0.9888888888888889
 
 key: test_roc_auc 
-value: [0.8 0.9 0.9 0.6 0.9 0.7 0.8 0.8 0.9 0.7]
+value: [0.9 0.9 0.7 0.7 0.9 0.7 0.9 0.8 0.9 1. ]
 
-mean value: 0.8
+mean value: 0.8400000000000001
 
 key: train_roc_auc 
-value: [0.96666667 0.96666667 0.95555556 0.98888889 0.98888889 0.94444444
- 0.95555556 0.96666667 0.98888889 0.97777778]
+value: [1.         0.97777778 0.94444444 0.95555556 0.95555556 0.97777778
+ 0.96666667 0.96666667 0.97777778 0.98888889]
 
-mean value: 0.97
+mean value: 0.9711111111111111
 
 key: test_jcc 
-value: [0.71428571 0.83333333 0.83333333 0.5        0.8        0.4
- 0.71428571 0.71428571 0.8        0.5       ]
+value: [0.83333333 0.83333333 0.5        0.57142857 0.83333333 0.625
+ 0.8        0.71428571 0.8        1.        ]
 
-mean value: 0.680952380952381
+mean value: 0.7510714285714286
 
 key: train_jcc 
-value: [0.9375     0.9375     0.91836735 0.97826087 0.97826087 0.88888889
- 0.91666667 0.9375     0.97826087 0.95555556]
+value: [1.         0.95555556 0.9        0.91666667 0.91836735 0.95744681
+ 0.93617021 0.9375     0.95652174 0.97826087]
 
-mean value: 0.9426761066745539
+mean value: 0.9456489199133246
 
-MCC on Blind test: 0.15
+MCC on Blind test: 0.36
 
-Accuracy on Blind test: 0.6
+Accuracy on Blind test: 0.72
 
 Model_name: Stochastic GDescent 
 Model func: SGDClassifier(n_jobs=10, random_state=42) 
-List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
+List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
+  _warn_prf(average, modifier, msg_start, len(result))
+[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
                        n_estimators=1000, n_jobs=10, oob_score=True,
                        random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
@@ -24430,101 +24636,101 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', SGDClassifier(n_jobs=10, random_state=42))])
 
 key: fit_time 
-value: [0.01266241 0.01253319 0.01259041 0.0122745  0.01273322 0.01224399
- 0.01242423 0.01244211 0.01222348 0.01222324]
+value: [0.01229525 0.01237893 0.01234698 0.01202965 0.0121994  0.01249433
+ 0.01228952 0.01218224 0.01196837 0.01224113]
 
-mean value: 0.012435078620910645
+mean value: 0.012242579460144043
 
 key: score_time 
-value: [0.01101017 0.01124358 0.01118398 0.01122379 0.01121879 0.01122594
- 0.01122475 0.01121068 0.01118016 0.01124597]
+value: [0.01024342 0.01122355 0.0113194  0.01121879 0.01136684 0.01144981
+ 0.01120973 0.01118469 0.0115819  0.01125216]
 
-mean value: 0.011196780204772949
+mean value: 0.011205029487609864
 
 key: test_mcc 
-value: [0.65465367 0.81649658 0.81649658 0.21821789 0.40824829 0.65465367
- 0.81649658 0.65465367 0.81649658 0.81649658]
+value: [0.81649658 0.81649658 0.21821789 0.6        0.33333333 0.40824829
+ 0.81649658 0.81649658 0.         0.81649658]
 
-mean value: 0.6672910097462417
+mean value: 0.5642282418671819
 
 key: train_mcc 
-value: [0.93356387 0.97801929 0.95650071 1.         1.         0.88910845
- 0.93356387 0.97801929 0.97801929 0.97801929]
+value: [1.         0.95650071 0.97801929 0.8675239  0.74278135 0.97801929
+ 0.88910845 0.89442719 0.48257301 0.91111111]
 
-mean value: 0.9624814081711083
+mean value: 0.8700064322418951
 
 key: test_accuracy 
-value: [0.8 0.9 0.9 0.6 0.7 0.8 0.9 0.8 0.9 0.9]
+value: [0.9 0.9 0.6 0.8 0.6 0.7 0.9 0.9 0.5 0.9]
 
-mean value: 0.8200000000000001
+mean value: 0.77
 
 key: train_accuracy 
-value: [0.96666667 0.98888889 0.97777778 1.         1.         0.94444444
- 0.96666667 0.98888889 0.98888889 0.98888889]
+value: [1.         0.97777778 0.98888889 0.93333333 0.85555556 0.98888889
+ 0.94444444 0.94444444 0.68888889 0.95555556]
 
-mean value: 0.9811111111111112
+mean value: 0.9277777777777778
 
 key: test_fscore 
-value: [0.83333333 0.90909091 0.90909091 0.66666667 0.72727273 0.75
- 0.90909091 0.83333333 0.88888889 0.90909091]
+value: [0.90909091 0.90909091 0.5        0.8        0.33333333 0.72727273
+ 0.88888889 0.90909091 0.         0.90909091]
 
-mean value: 0.8335858585858585
+mean value: 0.6885858585858586
 
 key: train_fscore 
-value: [0.96629213 0.98876404 0.97727273 1.         1.         0.94382022
- 0.96703297 0.98876404 0.98901099 0.98901099]
+value: [1.         0.97826087 0.98876404 0.93181818 0.83116883 0.98876404
+ 0.94505495 0.94736842 0.5483871  0.95555556]
 
-mean value: 0.9809968121765875
+mean value: 0.9115141990877197
 
 key: test_precision 
-value: [0.71428571 0.83333333 0.83333333 0.57142857 0.66666667 1.
- 0.83333333 0.71428571 1.         0.83333333]
+value: [0.83333333 0.83333333 0.66666667 0.8        1.         0.66666667
+ 1.         0.83333333 0.         0.83333333]
 
-mean value: 0.8
+mean value: 0.7466666666666667
 
 key: train_precision 
-value: [0.97727273 1.         1.         1.         1.         0.95454545
- 0.95652174 1.         0.97826087 0.97826087]
+value: [1.         0.95744681 1.         0.95348837 1.         1.
+ 0.93478261 0.9        1.         0.95555556]
 
-mean value: 0.9844861660079052
+mean value: 0.9701273344854869
 
 key: test_recall 
-value: [1.  1.  1.  0.8 0.8 0.6 1.  1.  0.8 1. ]
+value: [1.  1.  0.4 0.8 0.2 0.8 0.8 1.  0.  1. ]
 
-mean value: 0.9
+mean value: 0.7000000000000001
 
 key: train_recall 
-value: [0.95555556 0.97777778 0.95555556 1.         1.         0.93333333
- 0.97777778 0.97777778 1.         1.        ]
+value: [1.         1.         0.97777778 0.91111111 0.71111111 0.97777778
+ 0.95555556 1.         0.37777778 0.95555556]
 
-mean value: 0.9777777777777777
+mean value: 0.8866666666666667
 
 key: test_roc_auc 
-value: [0.8 0.9 0.9 0.6 0.7 0.8 0.9 0.8 0.9 0.9]
+value: [0.9 0.9 0.6 0.8 0.6 0.7 0.9 0.9 0.5 0.9]
 
-mean value: 0.8200000000000001
+mean value: 0.77
 
 key: train_roc_auc 
-value: [0.96666667 0.98888889 0.97777778 1.         1.         0.94444444
- 0.96666667 0.98888889 0.98888889 0.98888889]
+value: [1.         0.97777778 0.98888889 0.93333333 0.85555556 0.98888889
+ 0.94444444 0.94444444 0.68888889 0.95555556]
 
-mean value: 0.9811111111111112
+mean value: 0.9277777777777778
 
 key: test_jcc 
-value: [0.71428571 0.83333333 0.83333333 0.5        0.57142857 0.6
- 0.83333333 0.71428571 0.8        0.83333333]
+value: [0.83333333 0.83333333 0.33333333 0.66666667 0.2        0.57142857
+ 0.8        0.83333333 0.         0.83333333]
 
-mean value: 0.7233333333333334
+mean value: 0.5904761904761905
 
 key: train_jcc 
-value: [0.93478261 0.97777778 0.95555556 1.         1.         0.89361702
- 0.93617021 0.97777778 0.97826087 0.97826087]
+value: [1.         0.95744681 0.97777778 0.87234043 0.71111111 0.97777778
+ 0.89583333 0.9        0.37777778 0.91489362]
 
-mean value: 0.9632202692979751
+mean value: 0.8584958628841608
 
-MCC on Blind test: 0.08
+MCC on Blind test: 0.32
 
-Accuracy on Blind test: 0.57
+Accuracy on Blind test: 0.7
 
 Model_name: AdaBoost Classifier 
 Model func: AdaBoostClassifier(random_state=42) 
@@ -24557,22 +24763,22 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', AdaBoostClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.09050202 0.08575082 0.08697701 0.08462596 0.08410382 0.0856297
- 0.08490443 0.08724713 0.08709764 0.08831048]
+value: [0.09080362 0.08026624 0.0802474  0.07980943 0.07940364 0.08048773
+ 0.07949638 0.07976413 0.07984233 0.07969499]
 
-mean value: 0.08651490211486816
+mean value: 0.08098158836364747
 
 key: score_time 
-value: [0.01545334 0.01579857 0.01585507 0.01584077 0.01450062 0.01623821
- 0.01575541 0.01573849 0.01574063 0.01583362]
+value: [0.01442552 0.01436472 0.01435137 0.01420498 0.01422119 0.01444697
+ 0.01426816 0.01428533 0.01425791 0.01430011]
 
-mean value: 0.0156754732131958
+mean value: 0.01431262493133545
 
 key: test_mcc 
-value: [0.81649658 0.81649658 0.81649658 0.65465367 0.81649658 1.
- 0.81649658 0.81649658 0.81649658 0.81649658]
+value: [0.81649658 0.65465367 0.81649658 0.81649658 0.81649658 0.81649658
+ 1.         0.81649658 0.65465367 1.        ]
 
-mean value: 0.8186626318129786
+mean value: 0.8208286826982311
 
 key: train_mcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24580,7 +24786,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.9 0.9 0.9 0.8 0.9 1.  0.9 0.9 0.9 0.9]
+value: [0.9 0.8 0.9 0.9 0.9 0.9 1.  0.9 0.8 1. ]
 
 mean value: 0.9
 
@@ -24590,10 +24796,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.90909091 0.90909091 0.90909091 0.83333333 0.90909091 1.
- 0.88888889 0.90909091 0.88888889 0.90909091]
+value: [0.90909091 0.83333333 0.88888889 0.90909091 0.88888889 0.90909091
+ 1.         0.90909091 0.83333333 1.        ]
 
-mean value: 0.9065656565656566
+mean value: 0.908080808080808
 
 key: train_fscore 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24601,10 +24807,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.83333333 0.83333333 0.83333333 0.71428571 0.83333333 1.
- 1.         0.83333333 1.         0.83333333]
+value: [0.83333333 0.71428571 1.         0.83333333 1.         0.83333333
+ 1.         0.83333333 0.71428571 1.        ]
 
-mean value: 0.8714285714285714
+mean value: 0.8761904761904762
 
 key: train_precision 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24612,7 +24818,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [1.  1.  1.  1.  1.  1.  0.8 1.  0.8 1. ]
+value: [1.  1.  0.8 1.  0.8 1.  1.  1.  1.  1. ]
 
 mean value: 0.96
 
@@ -24622,7 +24828,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.9 0.9 0.9 0.8 0.9 1.  0.9 0.9 0.9 0.9]
+value: [0.9 0.8 0.9 0.9 0.9 0.9 1.  0.9 0.8 1. ]
 
 mean value: 0.9
 
@@ -24632,10 +24838,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.83333333 0.83333333 0.83333333 0.71428571 0.83333333 1.
- 0.8        0.83333333 0.8        0.83333333]
+value: [0.83333333 0.71428571 0.8        0.83333333 0.8        0.83333333
+ 1.         0.83333333 0.71428571 1.        ]
 
-mean value: 0.8314285714285714
+mean value: 0.8361904761904763
 
 key: train_jcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24679,68 +24885,68 @@ Running model pipeline: Pipeline(steps=[('prep',
                                    random_state=42))])
 
 key: fit_time 
-value: [0.03332639 0.03159523 0.0316155  0.02976799 0.02722383 0.02606964
- 0.02668548 0.0271318  0.03500819 0.02735019]
+value: [0.02699709 0.03070116 0.03277135 0.05133557 0.02615666 0.02731848
+ 0.047997   0.03133965 0.03537989 0.04155445]
 
-mean value: 0.029577422142028808
+mean value: 0.03515512943267822
 
 key: score_time 
-value: [0.01860285 0.01750827 0.02369905 0.02393627 0.01593733 0.01683497
- 0.01794434 0.0177505  0.02326179 0.02694941]
+value: [0.02213979 0.02183247 0.03555799 0.03083539 0.01692629 0.02012992
+ 0.01762462 0.02948856 0.03649259 0.02368855]
 
-mean value: 0.020242476463317872
+mean value: 0.0254716157913208
 
 key: test_mcc 
-value: [0.65465367 1.         0.81649658 0.65465367 0.81649658 1.
- 0.81649658 0.81649658 0.81649658 0.81649658]
+value: [0.81649658 0.65465367 0.81649658 0.81649658 1.         1.
+ 1.         0.81649658 0.81649658 1.        ]
 
-mean value: 0.8208286826982311
+mean value: 0.8737136575346607
 
 key: train_mcc 
-value: [1.         1.         0.97801929 1.         1.         0.97801929
- 1.         0.97801929 1.         1.        ]
+value: [1.         1.         1.         1.         0.97801929 1.
+ 1.         1.         0.97801929 1.        ]
 
-mean value: 0.9934057881530954
+mean value: 0.9956038587687303
 
 key: test_accuracy 
-value: [0.8 1.  0.9 0.8 0.9 1.  0.9 0.9 0.9 0.9]
+value: [0.9 0.8 0.9 0.9 1.  1.  1.  0.9 0.9 1. ]
 
-mean value: 0.9
+mean value: 0.93
 
 key: train_accuracy 
-value: [1.         1.         0.98888889 1.         1.         0.98888889
- 1.         0.98888889 1.         1.        ]
+value: [1.         1.         1.         1.         0.98888889 1.
+ 1.         1.         0.98888889 1.        ]
 
-mean value: 0.9966666666666667
+mean value: 0.9977777777777778
 
 key: test_fscore 
-value: [0.83333333 1.         0.90909091 0.83333333 0.88888889 1.
- 0.88888889 0.90909091 0.88888889 0.90909091]
+value: [0.90909091 0.83333333 0.88888889 0.90909091 1.         1.
+ 1.         0.90909091 0.90909091 1.        ]
 
-mean value: 0.906060606060606
+mean value: 0.9358585858585858
 
 key: train_fscore 
-value: [1.         1.         0.98901099 1.         1.         0.98901099
- 1.         0.98901099 1.         1.        ]
+value: [1.         1.         1.         1.         0.98901099 1.
+ 1.         1.         0.98901099 1.        ]
 
-mean value: 0.9967032967032967
+mean value: 0.9978021978021978
 
 key: test_precision 
-value: [0.71428571 1.         0.83333333 0.71428571 1.         1.
- 1.         0.83333333 1.         0.83333333]
+value: [0.83333333 0.71428571 1.         0.83333333 1.         1.
+ 1.         0.83333333 0.83333333 1.        ]
 
-mean value: 0.8928571428571429
+mean value: 0.9047619047619048
 
 key: train_precision 
-value: [1.         1.         0.97826087 1.         1.         0.97826087
- 1.         0.97826087 1.         1.        ]
+value: [1.         1.         1.         1.         0.97826087 1.
+ 1.         1.         0.97826087 1.        ]
 
-mean value: 0.9934782608695653
+mean value: 0.9956521739130435
 
 key: test_recall 
-value: [1.  1.  1.  1.  0.8 1.  0.8 1.  0.8 1. ]
+value: [1.  1.  0.8 1.  1.  1.  1.  1.  1.  1. ]
 
-mean value: 0.9400000000000001
+mean value: 0.98
 
 key: train_recall 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24748,31 +24954,31 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.8 1.  0.9 0.8 0.9 1.  0.9 0.9 0.9 0.9]
+value: [0.9 0.8 0.9 0.9 1.  1.  1.  0.9 0.9 1. ]
 
-mean value: 0.9
+mean value: 0.93
 
 key: train_roc_auc 
-value: [1.         1.         0.98888889 1.         1.         0.98888889
- 1.         0.98888889 1.         1.        ]
+value: [1.         1.         1.         1.         0.98888889 1.
+ 1.         1.         0.98888889 1.        ]
 
-mean value: 0.9966666666666666
+mean value: 0.9977777777777778
 
 key: test_jcc 
-value: [0.71428571 1.         0.83333333 0.71428571 0.8        1.
- 0.8        0.83333333 0.8        0.83333333]
+value: [0.83333333 0.71428571 0.8        0.83333333 1.         1.
+ 1.         0.83333333 0.83333333 1.        ]
 
-mean value: 0.8328571428571429
+mean value: 0.8847619047619047
 
 key: train_jcc 
-value: [1.         1.         0.97826087 1.         1.         0.97826087
- 1.         0.97826087 1.         1.        ]
+value: [1.         1.         1.         1.         0.97826087 1.
+ 1.         1.         0.97826087 1.        ]
 
-mean value: 0.9934782608695653
+mean value: 0.9956521739130435
 
-MCC on Blind test: 0.95
+MCC on Blind test: 0.89
 
-Accuracy on Blind test: 0.98
+Accuracy on Blind test: 0.95
 
 Model_name: Gaussian Process 
 Model func: GaussianProcessClassifier(random_state=42) 
@@ -24805,22 +25011,22 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', GaussianProcessClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.01528788 0.01607227 0.02439117 0.01962614 0.01726866 0.01696539
- 0.01694345 0.01618481 0.01628375 0.01687813]
+value: [0.01359749 0.01574779 0.01587391 0.02081776 0.01601291 0.01616716
+ 0.01599646 0.01598001 0.01611137 0.01597762]
 
-mean value: 0.01759016513824463
+mean value: 0.016228246688842773
 
 key: score_time 
-value: [0.01140237 0.01114202 0.01187825 0.0118506  0.01176071 0.0119977
- 0.01188946 0.01196241 0.01185942 0.01186037]
+value: [0.01142287 0.01155448 0.01172853 0.01179075 0.01171851 0.01176023
+ 0.01172566 0.01173353 0.01173139 0.01173902]
 
-mean value: 0.011760330200195313
+mean value: 0.011690497398376465
 
 key: test_mcc 
-value: [0.6        0.65465367 0.81649658 0.21821789 0.6        0.81649658
- 0.40824829 0.5        0.40824829 0.40824829]
+value: [0.81649658 0.65465367 0.40824829 0.6        0.5        0.40824829
+ 0.40824829 0.40824829 0.6        0.81649658]
 
-mean value: 0.543060959419101
+mean value: 0.5620639994418881
 
 key: train_mcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24828,9 +25034,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.8 0.8 0.9 0.6 0.8 0.9 0.7 0.7 0.7 0.7]
+value: [0.9 0.8 0.7 0.8 0.7 0.7 0.7 0.7 0.8 0.9]
 
-mean value: 0.76
+mean value: 0.77
 
 key: train_accuracy 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24838,10 +25044,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.8        0.83333333 0.90909091 0.66666667 0.8        0.88888889
- 0.66666667 0.76923077 0.72727273 0.66666667]
+value: [0.90909091 0.83333333 0.66666667 0.8        0.57142857 0.72727273
+ 0.72727273 0.72727273 0.8        0.90909091]
 
-mean value: 0.7727816627816628
+mean value: 0.7671428571428571
 
 key: train_fscore 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24849,10 +25055,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.8        0.71428571 0.83333333 0.57142857 0.8        1.
- 0.75       0.625      0.66666667 0.75      ]
+value: [0.83333333 0.71428571 0.75       0.8        1.         0.66666667
+ 0.66666667 0.66666667 0.8        0.83333333]
 
-mean value: 0.7510714285714286
+mean value: 0.7730952380952381
 
 key: train_precision 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24860,9 +25066,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.8 1.  1.  0.8 0.8 0.8 0.6 1.  0.8 0.6]
+value: [1.  1.  0.6 0.8 0.4 0.8 0.8 0.8 0.8 1. ]
 
-mean value: 0.8200000000000001
+mean value: 0.8
 
 key: train_recall 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24870,9 +25076,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.8 0.8 0.9 0.6 0.8 0.9 0.7 0.7 0.7 0.7]
+value: [0.9 0.8 0.7 0.8 0.7 0.7 0.7 0.7 0.8 0.9]
 
-mean value: 0.76
+mean value: 0.77
 
 key: train_roc_auc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24880,19 +25086,19 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.66666667 0.71428571 0.83333333 0.5        0.66666667 0.8
- 0.5        0.625      0.57142857 0.5       ]
+value: [0.83333333 0.71428571 0.5        0.66666667 0.4        0.57142857
+ 0.57142857 0.57142857 0.66666667 0.83333333]
 
-mean value: 0.6377380952380952
+mean value: 0.6328571428571429
 
 key: train_jcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: 0.16
+MCC on Blind test: 0.01
 
-Accuracy on Blind test: 0.62
+Accuracy on Blind test: 0.52
 
 Model_name: Gradient Boosting 
 Model func: GradientBoostingClassifier(random_state=42) 
@@ -24949,22 +25155,22 @@ Pipeline(steps=[('prep',
                 ('model', GradientBoostingClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.17597413 0.1721468  0.17924476 0.17421913 0.17393565 0.17166734
- 0.14058495 0.17408228 0.17574048 0.17127013]
+value: [0.22217441 0.19093752 0.1718936  0.16672945 0.18235731 0.17839694
+ 0.1745894  0.18664312 0.18885684 0.19141936]
 
-mean value: 0.17088656425476073
+mean value: 0.18539979457855224
 
 key: score_time 
-value: [0.00916481 0.00947452 0.00913882 0.00930166 0.00918031 0.00899935
- 0.0091269  0.00965333 0.00991249 0.00923991]
+value: [0.00920916 0.00894237 0.00916672 0.00911665 0.00934219 0.00979352
+ 0.0089221  0.00914454 0.00932813 0.00907612]
 
-mean value: 0.009319210052490234
+mean value: 0.00920414924621582
 
 key: test_mcc 
-value: [0.81649658 1.         0.81649658 1.         0.6        1.
- 0.81649658 0.81649658 0.81649658 0.81649658]
+value: [0.81649658 0.81649658 0.81649658 0.65465367 0.65465367 1.
+ 1.         0.81649658 0.81649658 0.81649658]
 
-mean value: 0.8498979485566356
+mean value: 0.8208286826982311
 
 key: train_mcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24972,9 +25178,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.9 1.  0.9 1.  0.8 1.  0.9 0.9 0.9 0.9]
+value: [0.9 0.9 0.9 0.8 0.8 1.  1.  0.9 0.9 0.9]
 
-mean value: 0.92
+mean value: 0.9
 
 key: train_accuracy 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24982,10 +25188,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.90909091 1.         0.90909091 1.         0.8        1.
- 0.88888889 0.90909091 0.88888889 0.90909091]
+value: [0.90909091 0.90909091 0.88888889 0.83333333 0.75       1.
+ 1.         0.90909091 0.90909091 0.88888889]
 
-mean value: 0.9214141414141415
+mean value: 0.8997474747474747
 
 key: train_fscore 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -24993,10 +25199,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_precision 
-value: [0.83333333 1.         0.83333333 1.         0.8        1.
- 1.         0.83333333 1.         0.83333333]
+value: [0.83333333 0.83333333 1.         0.71428571 1.         1.
+ 1.         0.83333333 0.83333333 1.        ]
 
-mean value: 0.9133333333333333
+mean value: 0.9047619047619048
 
 key: train_precision 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -25004,9 +25210,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [1.  1.  1.  1.  0.8 1.  0.8 1.  0.8 1. ]
+value: [1.  1.  0.8 1.  0.6 1.  1.  1.  1.  0.8]
 
-mean value: 0.9400000000000001
+mean value: 0.92
 
 key: train_recall 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -25014,9 +25220,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.9 1.  0.9 1.  0.8 1.  0.9 0.9 0.9 0.9]
+value: [0.9 0.9 0.9 0.8 0.8 1.  1.  0.9 0.9 0.9]
 
-mean value: 0.92
+mean value: 0.9
 
 key: train_roc_auc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -25024,19 +25230,19 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.83333333 1.         0.83333333 1.         0.66666667 1.
- 0.8        0.83333333 0.8        0.83333333]
+value: [0.83333333 0.83333333 0.8        0.71428571 0.6        1.
+ 1.         0.83333333 0.83333333 0.8       ]
 
-mean value: 0.86
+mean value: 0.8247619047619048
 
 key: train_jcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
 mean value: 1.0
 
-MCC on Blind test: 0.95
+MCC on Blind test: 0.84
 
-Accuracy on Blind test: 0.98
+Accuracy on Blind test: 0.92
 
 Model_name: QDA 
 Model func: QuadraticDiscriminantAnalysis() 
@@ -25069,22 +25275,22 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', QuadraticDiscriminantAnalysis())])
 
 key: fit_time 
-value: [0.01125002 0.01427794 0.01427984 0.01602817 0.01415706 0.01421785
- 0.0143075  0.01926303 0.01412845 0.01442242]
+value: [0.01358581 0.01411629 0.01471233 0.01414609 0.02364945 0.02104473
+ 0.01440454 0.01459599 0.01464772 0.01447725]
 
-mean value: 0.01463322639465332
+mean value: 0.01593801975250244
 
 key: score_time 
-value: [0.01145935 0.01170135 0.01169324 0.01170659 0.01228118 0.01166224
- 0.01343036 0.01290202 0.01216722 0.01210403]
+value: [0.01189876 0.01186109 0.01198363 0.0119555  0.01537371 0.01311707
+ 0.01196265 0.01466823 0.01172829 0.01508021]
 
-mean value: 0.012110757827758788
+mean value: 0.012962913513183594
 
 key: test_mcc 
-value: [0.81649658 1.         1.         0.81649658 0.81649658 0.65465367
- 0.5        0.81649658 0.65465367 0.33333333]
+value: [1.         1.         0.5        0.81649658 0.5        0.81649658
+ 0.81649658 0.81649658 0.81649658 0.65465367]
 
-mean value: 0.7408626998460192
+mean value: 0.7737136575346607
 
 key: train_mcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -25092,9 +25298,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_accuracy 
-value: [0.9 1.  1.  0.9 0.9 0.8 0.7 0.9 0.8 0.6]
+value: [1.  1.  0.7 0.9 0.7 0.9 0.9 0.9 0.9 0.8]
 
-mean value: 0.85
+mean value: 0.87
 
 key: train_accuracy 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -25102,10 +25308,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_fscore 
-value: [0.88888889 1.         1.         0.88888889 0.88888889 0.75
- 0.57142857 0.88888889 0.75       0.33333333]
+value: [1.         1.         0.57142857 0.88888889 0.57142857 0.88888889
+ 0.88888889 0.88888889 0.88888889 0.75      ]
 
-mean value: 0.7960317460317461
+mean value: 0.8337301587301588
 
 key: train_fscore 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -25123,9 +25329,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_recall 
-value: [0.8 1.  1.  0.8 0.8 0.6 0.4 0.8 0.6 0.2]
+value: [1.  1.  0.4 0.8 0.4 0.8 0.8 0.8 0.8 0.6]
 
-mean value: 0.7000000000000001
+mean value: 0.74
 
 key: train_recall 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -25133,9 +25339,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_roc_auc 
-value: [0.9 1.  1.  0.9 0.9 0.8 0.7 0.9 0.8 0.6]
+value: [1.  1.  0.7 0.9 0.7 0.9 0.9 0.9 0.9 0.8]
 
-mean value: 0.85
+mean value: 0.87
 
 key: train_roc_auc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -25143,9 +25349,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 mean value: 1.0
 
 key: test_jcc 
-value: [0.8 1.  1.  0.8 0.8 0.6 0.4 0.8 0.6 0.2]
+value: [1.  1.  0.4 0.8 0.4 0.8 0.8 0.8 0.8 0.6]
 
-mean value: 0.7000000000000001
+mean value: 0.74
 
 key: train_jcc 
 value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
@@ -25187,101 +25393,100 @@ Running model pipeline: Pipeline(steps=[('prep',
                 ('model', RidgeClassifier(random_state=42))])
 
 key: fit_time 
-value: [0.03190494 0.0162518  0.01298714 0.02096915 0.02571321 0.07055092
- 0.01573253 0.01286387 0.01908612 0.01534081]
+value: [0.0334847  0.03210235 0.03213644 0.03319669 0.03299642 0.03233814
+ 0.0345664  0.03212976 0.03434682 0.03329611]
 
-mean value: 0.024140048027038574
+mean value: 0.03305938243865967
 
 key: score_time 
-value: [0.0120213  0.01187372 0.01150703 0.01177883 0.0234704  0.01707196
- 0.01153183 0.01159644 0.01922894 0.01779103]
+value: [0.01982021 0.02007699 0.02197194 0.02256417 0.02357554 0.0200212
+ 0.0116353  0.02040458 0.02340674 0.0233531 ]
 
-mean value: 0.01478714942932129
+mean value: 0.020682978630065917
 
 key: test_mcc 
-value: [0.81649658 0.81649658 0.81649658 0.40824829 0.6        0.65465367
- 0.65465367 0.81649658 0.81649658 0.81649658]
+value: [0.65465367 0.81649658 0.40824829 0.6        0.81649658 0.5
+ 0.81649658 0.65465367 0.6        1.        ]
 
-mean value: 0.7216535117446173
+mean value: 0.6867045374662996
 
 key: train_mcc 
-value: [0.97801929 0.97801929 0.97801929 1.         1.         0.97801929
- 0.95555556 0.97801929 1.         0.97801929]
+value: [1.         0.97801929 0.97801929 0.97801929 0.97801929 0.97801929
+ 0.97801929 0.97801929 0.97801929 0.97801929]
 
-mean value: 0.9823671318617464
+mean value: 0.9802173644592863
 
 key: test_accuracy 
-value: [0.9 0.9 0.9 0.7 0.8 0.8 0.8 0.9 0.9 0.9]
+value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.8 1. ]
 
-mean value: 0.85
+mean value: 0.8300000000000001
 
 key: train_accuracy 
-value: [0.98888889 0.98888889 0.98888889 1.         1.         0.98888889
- 0.97777778 0.98888889 1.         0.98888889]
+value: [1.         0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
+ 0.98888889 0.98888889 0.98888889 0.98888889]
 
-mean value: 0.9911111111111112
+mean value: 0.99
 
 key: test_fscore 
-value: [0.90909091 0.90909091 0.90909091 0.72727273 0.8        0.83333333
- 0.83333333 0.90909091 0.88888889 0.90909091]
+value: [0.83333333 0.90909091 0.66666667 0.8        0.90909091 0.76923077
+ 0.88888889 0.83333333 0.8        1.        ]
 
-mean value: 0.8628282828282828
+mean value: 0.840963480963481
 
 key: train_fscore 
-value: [0.98901099 0.98901099 0.98901099 1.         1.         0.98901099
- 0.97777778 0.98901099 1.         0.98901099]
+value: [1.         0.98901099 0.98901099 0.98901099 0.98901099 0.98901099
+ 0.98901099 0.98901099 0.98901099 0.98901099]
 
-mean value: 0.9911843711843712
+mean value: 0.9901098901098901
 
 key: test_precision 
-value: [0.83333333 0.83333333 0.83333333 0.66666667 0.8        0.71428571
- 0.71428571 0.83333333 1.         0.83333333]
+value: [0.71428571 0.83333333 0.75       0.8        0.83333333 0.625
+ 1.         0.71428571 0.8        1.        ]
 
-mean value: 0.8061904761904762
+mean value: 0.8070238095238096
 
 key: train_precision 
-value: [0.97826087 0.97826087 0.97826087 1.         1.         0.97826087
- 0.97777778 0.97826087 1.         0.97826087]
+value: [1.         0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
+ 0.97826087 0.97826087 0.97826087 0.97826087]
 
-mean value: 0.9847342995169082
+mean value: 0.9804347826086957
 
 key: test_recall 
-value: [1.  1.  1.  0.8 0.8 1.  1.  1.  0.8 1. ]
+value: [1.  1.  0.6 0.8 1.  1.  0.8 1.  0.8 1. ]
 
-mean value: 0.9400000000000001
+mean value: 0.9
 
 key: train_recall 
-value: [1.         1.         1.         1.         1.         1.
- 0.97777778 1.         1.         1.        ]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.9977777777777778
+mean value: 1.0
 
 key: test_roc_auc 
-value: [0.9 0.9 0.9 0.7 0.8 0.8 0.8 0.9 0.9 0.9]
+value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.8 1. ]
 
-mean value: 0.8500000000000001
+mean value: 0.8300000000000001
 
 key: train_roc_auc 
-value: [0.98888889 0.98888889 0.98888889 1.         1.         0.98888889
- 0.97777778 0.98888889 1.         0.98888889]
+value: [1.         0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
+ 0.98888889 0.98888889 0.98888889 0.98888889]
 
-mean value: 0.991111111111111
+mean value: 0.99
 
 key: test_jcc 
-value: [0.83333333 0.83333333 0.83333333 0.57142857 0.66666667 0.71428571
- 0.71428571 0.83333333 0.8        0.83333333]
+value: [0.71428571 0.83333333 0.5        0.66666667 0.83333333 0.625
+ 0.8        0.71428571 0.66666667 1.        ]
 
-mean value: 0.7633333333333333
+mean value: 0.7353571428571428
 
 key: train_jcc 
-value: [0.97826087 0.97826087 0.97826087 1.         1.         0.97826087
- 0.95652174 0.97826087 1.         0.97826087]
+value: [1.         0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
+ 0.97826087 0.97826087 0.97826087 0.97826087]
 
-mean value: 0.9826086956521739
+mean value: 0.9804347826086957
 
-MCC on Blind test: 0.48
+MCC on Blind test: 0.37
 
-Accuracy on Blind test: 0.78
+Accuracy on Blind test: 0.72
 
 Model_name: Ridge ClassifierCV 
 Model func: RidgeClassifierCV(cv=10) 
@@ -25297,12 +25502,12 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), (
               reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
               tree_method='exact', use_label_encoder=False,
               validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
-Running model pipeline: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:188: SettingWithCopyWarning: 
+Running model pipeline: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:196: SettingWithCopyWarning: 
 A value is trying to be set on a copy of a slice from a DataFrame
 
 See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
   rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
-/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:191: SettingWithCopyWarning: 
+/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:199: SettingWithCopyWarning: 
 A value is trying to be set on a copy of a slice from a DataFrame
 
 See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
@@ -25324,98 +25529,97 @@ Pipeline(steps=[('prep',
                 ('model', RidgeClassifierCV(cv=10))])
 
 key: fit_time 
-value: [0.26709867 0.15390754 0.17872667 0.26606345 0.15658689 0.17817855
- 0.16672301 0.15290213 0.25758982 0.18749452]
+value: [0.10436153 0.10901594 0.15473795 0.18076968 0.18270421 0.19743443
+ 0.19927812 0.20212865 0.22301292 0.22878146]
 
-mean value: 0.1965271234512329
+mean value: 0.17822248935699464
 
 key: score_time 
-value: [0.0125277  0.01932573 0.02402663 0.02339983 0.02723026 0.01232576
- 0.02062273 0.02069354 0.02367878 0.02091861]
+value: [0.0118041  0.02067137 0.01172853 0.02039146 0.02330875 0.02281046
+ 0.02109528 0.02306652 0.0215745  0.02139878]
 
-mean value: 0.020474958419799804
+mean value: 0.019784975051879882
 
 key: test_mcc 
-value: [0.81649658 0.81649658 0.81649658 0.40824829 0.40824829 0.65465367
- 0.65465367 0.81649658 0.81649658 0.65465367]
+value: [0.65465367 0.81649658 0.40824829 0.6        0.81649658 0.5
+ 0.81649658 0.65465367 0.21821789 1.        ]
 
-mean value: 0.6862940497690287
+mean value: 0.6485263264898988
 
 key: train_mcc 
-value: [0.97801929 0.97801929 0.97801929 1.         1.         0.97801929
- 0.95555556 0.97801929 1.         1.        ]
+value: [1.         0.97801929 0.97801929 0.97801929 0.97801929 0.97801929
+ 0.97801929 0.97801929 1.         0.97801929]
 
-mean value: 0.9845652024773813
+mean value: 0.9824154350749212
 
 key: test_accuracy 
-value: [0.9 0.9 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.8]
+value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.6 1. ]
 
-mean value: 0.8300000000000001
+mean value: 0.81
 
 key: train_accuracy 
-value: [0.98888889 0.98888889 0.98888889 1.         1.         0.98888889
- 0.97777778 0.98888889 1.         1.        ]
+value: [1.         0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
+ 0.98888889 0.98888889 1.         0.98888889]
 
-mean value: 0.9922222222222222
+mean value: 0.9911111111111112
 
 key: test_fscore 
-value: [0.90909091 0.90909091 0.90909091 0.72727273 0.72727273 0.83333333
- 0.83333333 0.90909091 0.88888889 0.83333333]
+value: [0.83333333 0.90909091 0.66666667 0.8        0.90909091 0.76923077
+ 0.88888889 0.83333333 0.66666667 1.        ]
 
-mean value: 0.847979797979798
+mean value: 0.8276301476301476
 
 key: train_fscore 
-value: [0.98901099 0.98901099 0.98901099 1.         1.         0.98901099
- 0.97777778 0.98901099 1.         1.        ]
+value: [1.         0.98901099 0.98901099 0.98901099 0.98901099 0.98901099
+ 0.98901099 0.98901099 1.         0.98901099]
 
-mean value: 0.9922832722832723
+mean value: 0.9912087912087912
 
 key: test_precision 
-value: [0.83333333 0.83333333 0.83333333 0.66666667 0.66666667 0.71428571
- 0.71428571 0.83333333 1.         0.71428571]
+value: [0.71428571 0.83333333 0.75       0.8        0.83333333 0.625
+ 1.         0.71428571 0.57142857 1.        ]
 
-mean value: 0.780952380952381
+mean value: 0.7841666666666667
 
 key: train_precision 
-value: [0.97826087 0.97826087 0.97826087 1.         1.         0.97826087
- 0.97777778 0.97826087 1.         1.        ]
+value: [1.         0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
+ 0.97826087 0.97826087 1.         0.97826087]
 
-mean value: 0.9869082125603865
+mean value: 0.9826086956521739
 
 key: test_recall 
-value: [1.  1.  1.  0.8 0.8 1.  1.  1.  0.8 1. ]
+value: [1.  1.  0.6 0.8 1.  1.  0.8 1.  0.8 1. ]
 
-mean value: 0.9400000000000001
+mean value: 0.9
 
 key: train_recall 
-value: [1.         1.         1.         1.         1.         1.
- 0.97777778 1.         1.         1.        ]
+value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 
-mean value: 0.9977777777777778
+mean value: 1.0
 
 key: test_roc_auc 
-value: [0.9 0.9 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.8]
+value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.6 1. ]
 
-mean value: 0.8300000000000001
+mean value: 0.81
 
 key: train_roc_auc 
-value: [0.98888889 0.98888889 0.98888889 1.         1.         0.98888889
- 0.97777778 0.98888889 1.         1.        ]
+value: [1.         0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
+ 0.98888889 0.98888889 1.         0.98888889]
 
-mean value: 0.9922222222222222
+mean value: 0.991111111111111
 
 key: test_jcc 
-value: [0.83333333 0.83333333 0.83333333 0.57142857 0.57142857 0.71428571
- 0.71428571 0.83333333 0.8        0.71428571]
+value: [0.71428571 0.83333333 0.5        0.66666667 0.83333333 0.625
+ 0.8        0.71428571 0.5        1.        ]
 
-mean value: 0.741904761904762
+mean value: 0.7186904761904762
 
 key: train_jcc 
-value: [0.97826087 0.97826087 0.97826087 1.         1.         0.97826087
- 0.95652174 0.97826087 1.         1.        ]
+value: [1.         0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
+ 0.97826087 0.97826087 1.         0.97826087]
 
-mean value: 0.9847826086956522
+mean value: 0.9826086956521739
 
-MCC on Blind test: 0.48
+MCC on Blind test: 0.37
 
-Accuracy on Blind test: 0.78
+Accuracy on Blind test: 0.72
diff --git a/scripts/ml/ml_data.py b/scripts/ml/ml_data.py
index 488c549..62eba26 100644
--- a/scripts/ml/ml_data.py
+++ b/scripts/ml/ml_data.py
@@ -5,706 +5,726 @@ Created on Sun Mar  6 13:41:54 2022
 
 @author: tanu
 """
-def setvars(gene,drug):
-    #https://stackoverflow.com/questions/51695322/compare-multiple-algorithms-with-sklearn-pipeline
-    import os, sys
-    import pandas as pd
-    import numpy as np
-    print(np.__version__)
-    print(pd.__version__)
-    import pprint as pp
-    from copy import deepcopy
-    from collections import Counter
-    from sklearn.impute import KNNImputer as KNN
-    from imblearn.over_sampling import RandomOverSampler
-    from imblearn.under_sampling import RandomUnderSampler
-    from imblearn.over_sampling import SMOTE
-    from sklearn.datasets import make_classification
-    from imblearn.combine import SMOTEENN
-    from imblearn.combine import SMOTETomek
-    
-    from imblearn.over_sampling import SMOTENC
-    from imblearn.under_sampling import EditedNearestNeighbours
-    from imblearn.under_sampling import RepeatedEditedNearestNeighbours
-    
-    from sklearn.metrics import make_scorer, confusion_matrix, accuracy_score, balanced_accuracy_score, precision_score, average_precision_score, recall_score
-    from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoef, jaccard_score, classification_report
-    
-    from sklearn.model_selection import train_test_split, cross_validate, cross_val_score
-    from sklearn.model_selection import StratifiedKFold,RepeatedStratifiedKFold, RepeatedKFold
-    
-    from sklearn.pipeline import Pipeline, make_pipeline
-    #%% GLOBALS
-    rs = {'random_state': 42}
-    njobs = {'n_jobs': 10}
-    
-    scoring_fn =  ({ 'mcc'         : make_scorer(matthews_corrcoef)
-                    , 'accuracy'   : make_scorer(accuracy_score)
-                    , 'fscore'     : make_scorer(f1_score)
-                    , 'precision'  : make_scorer(precision_score)
-                    , 'recall'     : make_scorer(recall_score)
-                    , 'roc_auc'    : make_scorer(roc_auc_score)
-                    , 'jcc'        : make_scorer(jaccard_score)
-                }) 
-      
-    skf_cv = StratifiedKFold(n_splits = 10
-                              #, shuffle = False, random_state= None)
-                               , shuffle = True,**rs)
-    
-    rskf_cv = RepeatedStratifiedKFold(n_splits = 10
-                                      , n_repeats = 3
-                                      , **rs)
-    
-    mcc_score_fn  = {'mcc': make_scorer(matthews_corrcoef)}
-    jacc_score_fn = {'jcc': make_scorer(jaccard_score)}
-    
-    #%% FOR LATER: Combine ED logo data
-    #%% DONE: active aa site annotations **DONE on 15/05/2022 as part of generating merged_dfs
-    ###########################################################################
-    rs = {'random_state': 42}
-    njobs = {'n_jobs': 10}
-    homedir = os.path.expanduser("~")
-    
-    geneL_basic     = ['pnca']
-    geneL_na        = ['gid']
-    geneL_na_ppi2   = ['rpob']
-    geneL_ppi2      = ['alr', 'embb', 'katg']
-    
-    #num_type = ['int64', 'float64']
-    num_type = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
-    cat_type = ['object', 'bool']
-    
-    #==============
-    # directories
-    #==============
-    datadir = homedir + '/git/Data/'
-    indir   = datadir + drug + '/input/'
-    outdir  = datadir + drug + '/output/'
-    
-    #=======
-    # input
-    #=======
-    
-    #---------
-    # File 1
-    #---------
-    infile_ml1 = outdir + gene.lower() + '_merged_df3.csv' 
-    #infile_ml2 = outdir + gene.lower() + '_merged_df2.csv'
-    
-    my_features_df = pd.read_csv(infile_ml1, index_col = 0) 
-    my_features_df  = my_features_df .reset_index(drop = True)
-    my_features_df.index
-    
-    my_features_df.dtypes
-    mycols = my_features_df.columns
-    
-    #---------
-    # File 2
-    #---------
-    infile_aaindex = outdir + 'aa_index/' + gene.lower() + '_aa.csv' 
-    aaindex_df = pd.read_csv(infile_aaindex, index_col = 0) 
-    aaindex_df.dtypes
-    
-    #-----------
-    # check for non-numerical columns
-    #-----------
-    if any(aaindex_df.dtypes==object):
-        print('\naaindex_df contains non-numerical data')
-    
-    aaindex_df_object = aaindex_df.select_dtypes(include = cat_type)
-    print('\nTotal no. of non-numerial columns:', len(aaindex_df_object.columns))
-    
-    expected_aa_ncols = len(aaindex_df.columns) - len(aaindex_df_object.columns)
+#def setvars(gene,drug):
+#https://stackoverflow.com/questions/51695322/compare-multiple-algorithms-with-sklearn-pipeline
+import os, sys
+import pandas as pd
+import numpy as np
+print(np.__version__)
+print(pd.__version__)
+import pprint as pp
+from copy import deepcopy
+from collections import Counter
+from sklearn.impute import KNNImputer as KNN
+from imblearn.over_sampling import RandomOverSampler
+from imblearn.under_sampling import RandomUnderSampler
+from imblearn.over_sampling import SMOTE
+from sklearn.datasets import make_classification
+from imblearn.combine import SMOTEENN
+from imblearn.combine import SMOTETomek
 
-    #-----------
-    # Extract numerical data only
-    #-----------
-    print('\nSelecting numerical data only')
-    aaindex_df = aaindex_df.select_dtypes(include = num_type)
+from imblearn.over_sampling import SMOTENC
+from imblearn.under_sampling import EditedNearestNeighbours
+from imblearn.under_sampling import RepeatedEditedNearestNeighbours
 
-    #---------------------------
-    # aaindex: sanity check 1
-    #---------------------------
-    if len(aaindex_df.columns) == expected_aa_ncols:
-        print('\nPASS: successfully selected numerical columns only for aaindex_df')
-    else:
-        print('\nFAIL: Numbers mismatch'
-              , '\nExpected ncols:', expected_aa_ncols
-              , '\nGot:', len(aaindex_df.columns))    
-        
-    #---------------
-    # check for NA
-    #---------------
-    print('\nNow checking for NA in the remaining aaindex_cols')
-    c1 = aaindex_df.isna().sum()
-    c2 = c1.sort_values(ascending=False)
-    print('\nCounting aaindex_df cols with NA'
-          , '\nncols with NA:', sum(c2>0), 'columns'
-          , '\nDropping these...'
-          , '\nOriginal ncols:', len(aaindex_df.columns)
-          )
-    aa_df = aaindex_df.dropna(axis=1)
-    
-    print('\nRevised df ncols:', len(aa_df.columns))
-    
-    c3 = aa_df.isna().sum()
-    c4 = c3.sort_values(ascending=False)
-    
-    print('\nChecking NA in revised df...')
-    
-    if sum(c4>0):
-        sys.exit('\nFAIL: aaindex_df still contains cols with NA, please check and drop these before proceeding...')
-    else:
-        print('\nPASS: cols with NA successfully dropped from aaindex_df'
-              , '\nProceeding with combining aa_df with other features_df')
-        
-    #---------------------------
-    # aaindex: sanity check 2
-    #---------------------------
-    expected_aa_ncols2 =  len(aaindex_df.columns) - sum(c2>0)  
-    if len(aa_df.columns) == expected_aa_ncols2:
-        print('\nPASS: ncols match'
-              , '\nExpected ncols:', expected_aa_ncols2
-              , '\nGot:', len(aa_df.columns))
-    else:
-        print('\nFAIL: Numbers mismatch'
-              , '\nExpected ncols:', expected_aa_ncols2
-              , '\nGot:', len(aa_df.columns))            
-        
-    # Important: need this to identify aaindex cols    
-    aa_df_cols = aa_df.columns
-    print('\nTotal no. of columns in clean aa_df:', len(aa_df_cols))
-    
-    ###############################################################################
-    #%% Combining my_features_df and aaindex_df
-    #===========================
-    # Merge my_df + aaindex_df
-    #===========================
-    
-    if aa_df.columns[aa_df.columns.isin(my_features_df.columns)] == my_features_df.columns[my_features_df.columns.isin(aa_df.columns)]:
-        print('\nMerging on column: mutationinformation')   
-    
-    if len(my_features_df) == len(aa_df):
-        expected_nrows = len(my_features_df)
-        print('\nProceeding to merge, expected nrows in merged_df:', expected_nrows)
-    else:
-        sys.exit('\nNrows mismatch, cannot merge. Please check'
-              , '\nnrows my_df:', len(my_features_df)
-              , '\nnrows aa_df:', len(aa_df))
-               
-    #-----------------
-    # Reset index: mutationinformation
-    # Very important for merging
-    #-----------------
-    aa_df = aa_df.reset_index()
-    
-    expected_ncols = len(my_features_df.columns) + len(aa_df.columns) - 1 # for the no. of merging col
+from sklearn.metrics import make_scorer, confusion_matrix, accuracy_score, balanced_accuracy_score, precision_score, average_precision_score, recall_score
+from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoef, jaccard_score, classification_report
 
-    #-----------------
-    # Merge: my_features_df + aa_df
-    #-----------------
-    merged_df = pd.merge(my_features_df
-                         , aa_df
-                         , on = 'mutationinformation')
-    
-    #---------------------------
-    # aaindex: sanity check 3
-    #---------------------------
-    if len(merged_df.columns) == expected_ncols:
-        print('\nPASS: my_features_df and aa_df successfully combined'
-              , '\nnrows:', len(merged_df)
-              , '\nncols:', len(merged_df.columns))
-    else:
-        sys.exit('\nFAIL: could not combine my_features_df and aa_df'
-                 , '\nCheck dims and merging cols!')
-        
-    #--------
-    # Reassign so downstream code doesn't need to change
-    #--------
-    my_df = merged_df.copy()
-    
-    #%% Data: my_df
-    # Check if non structural pos have crept in
-    # IDEALLY remove from source! But for rpoB do it here
-    # Drop NA where numerical cols have them
-    if gene.lower() in geneL_na_ppi2:
-        #D1148 get rid of
-        na_index = my_df['mutationinformation'].index[my_df['mcsm_na_affinity'].apply(np.isnan)]
-        my_df = my_df.drop(index=na_index)
-    
-    # FIXED: complete data for all muts inc L114M, F115L, V123L, V125I, V131M
-    # if gene.lower() in ['embb']:
-    #     na_index = my_df['mutationinformation'].index[my_df['ligand_distance'].apply(np.isnan)]
-    #     my_df = my_df.drop(index=na_index)
-    
-    # # Sanity check for non-structural positions
-    # print('\nChecking for non-structural postions')
-    # na_index = my_df['mutationinformation'].index[my_df['ligand_distance'].apply(np.isnan)]
-    # if len(na_index) > 0:
-    #     print('\nNon-structural positions detected for gene:', gene.lower()
-    #           , '\nTotal number of these detected:', len(na_index)
-    #           , '\These are at index:', na_index
-    #           , '\nOriginal nrows:', len(my_df)
-    #           , '\nDropping these...')
-    #     my_df = my_df.drop(index=na_index)
-    #     print('\nRevised nrows:', len(my_df))
-    # else:
-    #     print('\nNo non-structural positions detected for gene:', gene.lower()
-    #           , '\nnrows:', len(my_df))
-              
-    
-    ###########################################################################
-    #%% Add lineage calculation columns
-    #FIXME: Check if this can be imported from config?
-    total_mtblineage_uc = 8
-    lineage_colnames = ['lineage_list_all', 'lineage_count_all', 'lineage_count_unique', 'lineage_list_unique', 'lineage_multimode']
-    #bar = my_df[lineage_colnames]
-    my_df['lineage_proportion']      = my_df['lineage_count_unique']/my_df['lineage_count_all']
-    my_df['dist_lineage_proportion'] = my_df['lineage_count_unique']/total_mtblineage_uc
-    ###########################################################################
-    #%% Active site annotation column
-    # change from numberic to categorical
+from sklearn.model_selection import train_test_split, cross_validate, cross_val_score
+from sklearn.model_selection import StratifiedKFold,RepeatedStratifiedKFold, RepeatedKFold
 
-    if my_df['active_site'].dtype in num_type:
-        my_df['active_site'] = my_df['active_site'].astype(object)
-        my_df['active_site'].dtype
-    #%% AA property change
-    #--------------------
-    # Water prop change
-    #--------------------
-    my_df['water_change'] = my_df['wt_prop_water'] + str('_to_') + my_df['mut_prop_water']
-    my_df['water_change'].value_counts()
+from sklearn.pipeline import Pipeline, make_pipeline
+#%% GLOBALS
+rs = {'random_state': 42}
+njobs = {'n_jobs': 10}
+
+scoring_fn =  ({ 'mcc'         : make_scorer(matthews_corrcoef)
+                , 'accuracy'   : make_scorer(accuracy_score)
+                , 'fscore'     : make_scorer(f1_score)
+                , 'precision'  : make_scorer(precision_score)
+                , 'recall'     : make_scorer(recall_score)
+                , 'roc_auc'    : make_scorer(roc_auc_score)
+                , 'jcc'        : make_scorer(jaccard_score)
+            }) 
+  
+skf_cv = StratifiedKFold(n_splits = 10
+                          #, shuffle = False, random_state= None)
+                           , shuffle = True,**rs)
+
+rskf_cv = RepeatedStratifiedKFold(n_splits = 10
+                                  , n_repeats = 3
+                                  , **rs)
+
+mcc_score_fn  = {'mcc': make_scorer(matthews_corrcoef)}
+jacc_score_fn = {'jcc': make_scorer(jaccard_score)}
+
+#%% FOR LATER: Combine ED logo data
+#%% DONE: active aa site annotations **DONE on 15/05/2022 as part of generating merged_dfs
+###########################################################################
+rs = {'random_state': 42}
+njobs = {'n_jobs': 10}
+homedir = os.path.expanduser("~")
+
+geneL_basic     = ['pnca']
+geneL_na        = ['gid']
+geneL_na_ppi2   = ['rpob']
+geneL_ppi2      = ['alr', 'embb', 'katg']
+
+#num_type = ['int64', 'float64']
+num_type = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
+cat_type = ['object', 'bool']
+
+#==============
+# directories
+#==============
+datadir = homedir + '/git/Data/'
+indir   = datadir + drug + '/input/'
+outdir  = datadir + drug + '/output/'
+
+#=======
+# input
+#=======
+
+#---------
+# File 1
+#---------
+infile_ml1 = outdir + gene.lower() + '_merged_df3.csv' 
+#infile_ml2 = outdir + gene.lower() + '_merged_df2.csv'
+
+my_features_df = pd.read_csv(infile_ml1, index_col = 0) 
+my_features_df  = my_features_df .reset_index(drop = True)
+my_features_df.index
+
+my_features_df.dtypes
+mycols = my_features_df.columns
+
+#---------
+# File 2
+#---------
+infile_aaindex = outdir + 'aa_index/' + gene.lower() + '_aa.csv' 
+aaindex_df = pd.read_csv(infile_aaindex, index_col = 0) 
+aaindex_df.dtypes
+
+#-----------
+# check for non-numerical columns
+#-----------
+if any(aaindex_df.dtypes==object):
+    print('\naaindex_df contains non-numerical data')
+
+aaindex_df_object = aaindex_df.select_dtypes(include = cat_type)
+print('\nTotal no. of non-numerial columns:', len(aaindex_df_object.columns))
+
+expected_aa_ncols = len(aaindex_df.columns) - len(aaindex_df_object.columns)
+
+#-----------
+# Extract numerical data only
+#-----------
+print('\nSelecting numerical data only')
+aaindex_df = aaindex_df.select_dtypes(include = num_type)
+
+#---------------------------
+# aaindex: sanity check 1
+#---------------------------
+if len(aaindex_df.columns) == expected_aa_ncols:
+    print('\nPASS: successfully selected numerical columns only for aaindex_df')
+else:
+    print('\nFAIL: Numbers mismatch'
+          , '\nExpected ncols:', expected_aa_ncols
+          , '\nGot:', len(aaindex_df.columns))    
     
-    water_prop_changeD = {
-        'hydrophobic_to_neutral'          : 'change'
-        , 'hydrophobic_to_hydrophobic'    : 'no_change'
-        , 'neutral_to_neutral'            : 'no_change'
-        , 'neutral_to_hydrophobic'        : 'change'
-        , 'hydrophobic_to_hydrophilic'    : 'change'
-        , 'neutral_to_hydrophilic'        : 'change'
-        , 'hydrophilic_to_neutral'        : 'change'
-        , 'hydrophilic_to_hydrophobic'    : 'change'
-        , 'hydrophilic_to_hydrophilic'    : 'no_change'
-    }
+#---------------
+# check for NA
+#---------------
+print('\nNow checking for NA in the remaining aaindex_cols')
+c1 = aaindex_df.isna().sum()
+c2 = c1.sort_values(ascending=False)
+print('\nCounting aaindex_df cols with NA'
+      , '\nncols with NA:', sum(c2>0), 'columns'
+      , '\nDropping these...'
+      , '\nOriginal ncols:', len(aaindex_df.columns)
+      )
+aa_df = aaindex_df.dropna(axis=1)
+
+print('\nRevised df ncols:', len(aa_df.columns))
+
+c3 = aa_df.isna().sum()
+c4 = c3.sort_values(ascending=False)
+
+print('\nChecking NA in revised df...')
+
+if sum(c4>0):
+    sys.exit('\nFAIL: aaindex_df still contains cols with NA, please check and drop these before proceeding...')
+else:
+    print('\nPASS: cols with NA successfully dropped from aaindex_df'
+          , '\nProceeding with combining aa_df with other features_df')
     
-    my_df['water_change'] = my_df['water_change'].map(water_prop_changeD)
-    my_df['water_change'].value_counts()
+#---------------------------
+# aaindex: sanity check 2
+#---------------------------
+expected_aa_ncols2 =  len(aaindex_df.columns) - sum(c2>0)  
+if len(aa_df.columns) == expected_aa_ncols2:
+    print('\nPASS: ncols match'
+          , '\nExpected ncols:', expected_aa_ncols2
+          , '\nGot:', len(aa_df.columns))
+else:
+    print('\nFAIL: Numbers mismatch'
+          , '\nExpected ncols:', expected_aa_ncols2
+          , '\nGot:', len(aa_df.columns))            
     
-    #--------------------
-    # Polarity change
-    #--------------------
-    my_df['polarity_change'] = my_df['wt_prop_polarity'] + str('_to_') + my_df['mut_prop_polarity']
-    my_df['polarity_change'].value_counts()
+# Important: need this to identify aaindex cols    
+aa_df_cols = aa_df.columns
+print('\nTotal no. of columns in clean aa_df:', len(aa_df_cols))
+
+###############################################################################
+#%% Combining my_features_df and aaindex_df
+#===========================
+# Merge my_df + aaindex_df
+#===========================
+
+if aa_df.columns[aa_df.columns.isin(my_features_df.columns)] == my_features_df.columns[my_features_df.columns.isin(aa_df.columns)]:
+    print('\nMerging on column: mutationinformation')   
+
+if len(my_features_df) == len(aa_df):
+    expected_nrows = len(my_features_df)
+    print('\nProceeding to merge, expected nrows in merged_df:', expected_nrows)
+else:
+    sys.exit('\nNrows mismatch, cannot merge. Please check'
+          , '\nnrows my_df:', len(my_features_df)
+          , '\nnrows aa_df:', len(aa_df))
+           
+#-----------------
+# Reset index: mutationinformation
+# Very important for merging
+#-----------------
+aa_df = aa_df.reset_index()
+
+expected_ncols = len(my_features_df.columns) + len(aa_df.columns) - 1 # for the no. of merging col
+
+#-----------------
+# Merge: my_features_df + aa_df
+#-----------------
+merged_df = pd.merge(my_features_df
+                     , aa_df
+                     , on = 'mutationinformation')
+
+#---------------------------
+# aaindex: sanity check 3
+#---------------------------
+if len(merged_df.columns) == expected_ncols:
+    print('\nPASS: my_features_df and aa_df successfully combined'
+          , '\nnrows:', len(merged_df)
+          , '\nncols:', len(merged_df.columns))
+else:
+    sys.exit('\nFAIL: could not combine my_features_df and aa_df'
+             , '\nCheck dims and merging cols!')
     
-    polarity_prop_changeD = {
+#--------
+# Reassign so downstream code doesn't need to change
+#--------
+my_df = merged_df.copy()
+
+#%% Data: my_df
+# Check if non structural pos have crept in
+# IDEALLY remove from source! But for rpoB do it here
+# Drop NA where numerical cols have them
+if gene.lower() in geneL_na_ppi2:
+    #D1148 get rid of
+    na_index = my_df['mutationinformation'].index[my_df['mcsm_na_affinity'].apply(np.isnan)]
+    my_df = my_df.drop(index=na_index)
+
+# FIXED: complete data for all muts inc L114M, F115L, V123L, V125I, V131M
+# if gene.lower() in ['embb']:
+#     na_index = my_df['mutationinformation'].index[my_df['ligand_distance'].apply(np.isnan)]
+#     my_df = my_df.drop(index=na_index)
+
+# # Sanity check for non-structural positions
+# print('\nChecking for non-structural postions')
+# na_index = my_df['mutationinformation'].index[my_df['ligand_distance'].apply(np.isnan)]
+# if len(na_index) > 0:
+#     print('\nNon-structural positions detected for gene:', gene.lower()
+#           , '\nTotal number of these detected:', len(na_index)
+#           , '\These are at index:', na_index
+#           , '\nOriginal nrows:', len(my_df)
+#           , '\nDropping these...')
+#     my_df = my_df.drop(index=na_index)
+#     print('\nRevised nrows:', len(my_df))
+# else:
+#     print('\nNo non-structural positions detected for gene:', gene.lower()
+#           , '\nnrows:', len(my_df))
+          
+
+###########################################################################
+#%% Add lineage calculation columns
+#FIXME: Check if this can be imported from config?
+total_mtblineage_uc = 8
+lineage_colnames = ['lineage_list_all', 'lineage_count_all', 'lineage_count_unique', 'lineage_list_unique', 'lineage_multimode']
+#bar = my_df[lineage_colnames]
+my_df['lineage_proportion']      = my_df['lineage_count_unique']/my_df['lineage_count_all']
+my_df['dist_lineage_proportion'] = my_df['lineage_count_unique']/total_mtblineage_uc
+###########################################################################
+#%% Active site annotation column
+# change from numberic to categorical
+
+if my_df['active_site'].dtype in num_type:
+    my_df['active_site'] = my_df['active_site'].astype(object)
+    my_df['active_site'].dtype
+#%% AA property change
+#--------------------
+# Water prop change
+#--------------------
+my_df['water_change'] = my_df['wt_prop_water'] + str('_to_') + my_df['mut_prop_water']
+my_df['water_change'].value_counts()
+
+water_prop_changeD = {
+    'hydrophobic_to_neutral'          : 'change'
+    , 'hydrophobic_to_hydrophobic'    : 'no_change'
+    , 'neutral_to_neutral'            : 'no_change'
+    , 'neutral_to_hydrophobic'        : 'change'
+    , 'hydrophobic_to_hydrophilic'    : 'change'
+    , 'neutral_to_hydrophilic'        : 'change'
+    , 'hydrophilic_to_neutral'        : 'change'
+    , 'hydrophilic_to_hydrophobic'    : 'change'
+    , 'hydrophilic_to_hydrophilic'    : 'no_change'
+}
+
+my_df['water_change'] = my_df['water_change'].map(water_prop_changeD)
+my_df['water_change'].value_counts()
+
+#--------------------
+# Polarity change
+#--------------------
+my_df['polarity_change'] = my_df['wt_prop_polarity'] + str('_to_') + my_df['mut_prop_polarity']
+my_df['polarity_change'].value_counts()
+
+polarity_prop_changeD = {
+    'non-polar_to_non-polar'     : 'no_change'
+    , 'non-polar_to_neutral'     : 'change'  
+    , 'neutral_to_non-polar'     : 'change'  
+    , 'neutral_to_neutral'       : 'no_change'  
+    , 'non-polar_to_basic'       : 'change'  
+    , 'acidic_to_neutral'        : 'change'  
+    , 'basic_to_neutral'         : 'change'  
+    , 'non-polar_to_acidic'      : 'change'  
+    , 'neutral_to_basic'         : 'change'  
+    , 'acidic_to_non-polar'      : 'change'  
+    , 'basic_to_non-polar'       : 'change'
+    , 'neutral_to_acidic'        : 'change'
+    , 'acidic_to_acidic'         : 'no_change'
+    , 'basic_to_acidic'          : 'change'
+    , 'basic_to_basic'           : 'no_change'
+    , 'acidic_to_basic'          : 'change'}
+
+my_df['polarity_change'] = my_df['polarity_change'].map(polarity_prop_changeD)
+my_df['polarity_change'].value_counts()
+
+#--------------------
+# Electrostatics change
+#--------------------
+my_df['electrostatics_change'] = my_df['wt_calcprop'] + str('_to_') + my_df['mut_calcprop']
+my_df['electrostatics_change'].value_counts()
+
+calc_prop_changeD = {
         'non-polar_to_non-polar'     : 'no_change'
-        , 'non-polar_to_neutral'     : 'change'  
-        , 'neutral_to_non-polar'     : 'change'  
-        , 'neutral_to_neutral'       : 'no_change'  
-        , 'non-polar_to_basic'       : 'change'  
-        , 'acidic_to_neutral'        : 'change'  
-        , 'basic_to_neutral'         : 'change'  
-        , 'non-polar_to_acidic'      : 'change'  
-        , 'neutral_to_basic'         : 'change'  
-        , 'acidic_to_non-polar'      : 'change'  
-        , 'basic_to_non-polar'       : 'change'
-        , 'neutral_to_acidic'        : 'change'
-        , 'acidic_to_acidic'         : 'no_change'
-        , 'basic_to_acidic'          : 'change'
-        , 'basic_to_basic'           : 'no_change'
-        , 'acidic_to_basic'          : 'change'}
-    
-    my_df['polarity_change'] = my_df['polarity_change'].map(polarity_prop_changeD)
-    my_df['polarity_change'].value_counts()
-    
-    #--------------------
-    # Electrostatics change
-    #--------------------
-    my_df['electrostatics_change'] = my_df['wt_calcprop'] + str('_to_') + my_df['mut_calcprop']
-    my_df['electrostatics_change'].value_counts()
-    
-    calc_prop_changeD = {
-            'non-polar_to_non-polar'     : 'no_change'
-            , 'non-polar_to_polar'       : 'change'
-            , 'polar_to_non-polar'       : 'change'
-            , 'non-polar_to_pos'         : 'change'
-            , 'neg_to_non-polar'         : 'change'
-            , 'non-polar_to_neg'         : 'change'
-            , 'pos_to_polar'             : 'change'
-            , 'pos_to_non-polar'         : 'change'
-            , 'polar_to_polar'           : 'no_change'
-            , 'neg_to_neg'               : 'no_change'
-            , 'polar_to_neg'             : 'change'
-            , 'pos_to_neg'               : 'change'
-            , 'pos_to_pos'               : 'no_change'
-            , 'polar_to_pos'             : 'change'
-            , 'neg_to_polar'             : 'change'
-            , 'neg_to_pos'               : 'change'
-    }
-    
-    my_df['electrostatics_change'] = my_df['electrostatics_change'].map(calc_prop_changeD)
-    my_df['electrostatics_change'].value_counts()
-    
-    #--------------------    
-    # Summary change: Create a combined column summarising these three cols
-    #--------------------
-    detect_change = 'change'
-    check_prop_cols = ['water_change', 'polarity_change', 'electrostatics_change']
-    #my_df['aa_prop_change'] = (my_df.values == detect_change).any(1).astype(int)
-    my_df['aa_prop_change'] = (my_df[check_prop_cols].values == detect_change).any(1).astype(int)
-    my_df['aa_prop_change'].value_counts()
-    my_df['aa_prop_change'].dtype
-    
-    my_df['aa_prop_change'] = my_df['aa_prop_change'].map({1:'change'
-                                                           , 0: 'no_change'})
-    
-    my_df['aa_prop_change'].value_counts()
-    my_df['aa_prop_change'].dtype
-    
-    #%% IMPUTE values for OR [check script for exploration: UQ_or_imputer]
-    #--------------------
-    # Impute OR values
-    #--------------------
-    #or_cols = ['or_mychisq', 'log10_or_mychisq', 'or_fisher']
-    sel_cols = ['mutationinformation', 'or_mychisq', 'log10_or_mychisq']
-    or_cols = ['or_mychisq', 'log10_or_mychisq']
-    
-    print("count of NULL values before imputation\n")
-    print(my_df[or_cols].isnull().sum())
-    
-    my_dfI = pd.DataFrame(index = my_df['mutationinformation'] )
-    
-        
-    my_dfI = pd.DataFrame(KNN(n_neighbors=3, weights="uniform").fit_transform(my_df[or_cols])
-                          , index =  my_df['mutationinformation']
-                          , columns = or_cols )
-    my_dfI.columns = ['or_rawI', 'logorI']
-    my_dfI.columns
-    my_dfI = my_dfI.reset_index(drop = False) # prevents old index from being added as a column
-    my_dfI.head()
-    print("count of NULL values AFTER imputation\n")
-    print(my_dfI.isnull().sum())
-    
-    #-------------------------------------------
-    # OR df Merge: with original based on index
-    #-------------------------------------------
-    #my_df['index_bm'] = my_df.index
-    mydf_imputed = pd.merge(my_df
-                        , my_dfI
-                        , on = 'mutationinformation')
-    #mydf_imputed = mydf_imputed.set_index(['index_bm'])
-    
-    my_df['log10_or_mychisq'].isna().sum()
-    mydf_imputed['log10_or_mychisq'].isna().sum()
-    mydf_imputed['logorI'].isna().sum() # should be 0
-    
-    len(my_df.columns)
-    len(mydf_imputed.columns)  
-    
-    #-----------------------------------------
-    # REASSIGN my_df after imputing OR values
-    #-----------------------------------------
-    my_df = mydf_imputed.copy()
-    
-    if my_df['logorI'].isna().sum() == 0:
-        print('\nPASS: OR values imputed, data ready for ML')
-    else:
-        sys.exit('\nFAIL: something went wrong, Data not ready for ML. Please check upstream!')
-    
-    #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-    #---------------------------------------
-    # TODO: try other imputation like MICE
-    #---------------------------------------
-    #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-    
-    #%%########################################################################
-    #==========================
-    #     Data for ML
-    #==========================
-    my_df_ml = my_df.copy()
-    
-    #%% Build X: input for ML
-    common_cols_stabiltyN = ['ligand_distance'
-               , 'ligand_affinity_change'
-               , 'duet_stability_change'
-               , 'ddg_foldx'
-               , 'deepddg'
-               , 'ddg_dynamut2'
-               , 'mmcsm_lig'
-               , 'contacts']
-    
-    # Build stability columns ~ gene
-    if gene.lower() in geneL_basic:
-        X_stabilityN = common_cols_stabiltyN
-        cols_to_mask = ['ligand_affinity_change']
-        
-    if gene.lower() in geneL_ppi2:
-    #    X_stabilityN = common_cols_stabiltyN + ['mcsm_ppi2_affinity' , 'interface_dist'] 
-        geneL_ppi2_st_cols = ['mcsm_ppi2_affinity', 'interface_dist'] 
-        X_stabilityN = common_cols_stabiltyN + geneL_ppi2_st_cols
-        cols_to_mask = ['ligand_affinity_change', 'mcsm_ppi2_affinity']
-    
-    if gene.lower() in geneL_na:
-    #    X_stabilityN = common_cols_stabiltyN + ['mcsm_na_affinity'] 
-        geneL_na_st_cols =  ['mcsm_na_affinity'] 
-        X_stabilityN = common_cols_stabiltyN + geneL_na_st_cols
-        cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity']
-    
-    if gene.lower() in geneL_na_ppi2:
-    #    X_stabilityN = common_cols_stabiltyN + ['mcsm_na_affinity'] + ['mcsm_ppi2_affinity', 'interface_dist'] 
-        geneL_na_ppi2_st_cols = ['mcsm_na_affinity'] + ['mcsm_ppi2_affinity', 'interface_dist'] 
-        X_stabilityN = common_cols_stabiltyN + geneL_na_ppi2_st_cols
-        cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity', 'mcsm_ppi2_affinity']
-    
-    
-    X_foldX_cols = [ 'electro_rr', 'electro_mm', 'electro_sm', 'electro_ss'
-    , 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss'
-    , 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss'
-    , 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss'
-    , 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss'
-    , 'volumetric_rr', 'volumetric_mm', 'volumetric_ss'
-    ]
-    
-    X_str =  ['rsa'
-               #, 'asa'
-               , 'kd_values'
-               , 'rd_values']    
-    
-    X_ssFN = X_stabilityN + X_str + X_foldX_cols
-    
-    X_evolFN =  ['consurf_score'
-               , 'snap2_score'
-               , 'provean_score']
-        
-    X_genomic_mafor =  ['maf'
-                    , 'logorI'
-                    # , 'or_rawI'
-                    # , 'or_mychisq'
-                    # , 'or_logistic'
-                    # , 'or_fisher'
-                    # , 'pval_fisher'
-                    ]
-    
-    X_genomic_linegae  = ['lineage_proportion'
-                          , 'dist_lineage_proportion'
-                          #, 'lineage' # could be included as a category but it has L2;L4  formatting
-                          , 'lineage_count_all'
-                          , 'lineage_count_unique'
-                          ]
-    
-    X_genomicFN = X_genomic_mafor + X_genomic_linegae
-    
-    X_aaindexFN = list(aa_df_cols)
-    
-    print('\nTotal no. of features for aaindex:', len(X_aaindexFN))
-    
-    # numerical feature names
-    numerical_FN = X_ssFN  + X_evolFN + X_genomicFN + X_aaindexFN
+        , 'non-polar_to_polar'       : 'change'
+        , 'polar_to_non-polar'       : 'change'
+        , 'non-polar_to_pos'         : 'change'
+        , 'neg_to_non-polar'         : 'change'
+        , 'non-polar_to_neg'         : 'change'
+        , 'pos_to_polar'             : 'change'
+        , 'pos_to_non-polar'         : 'change'
+        , 'polar_to_polar'           : 'no_change'
+        , 'neg_to_neg'               : 'no_change'
+        , 'polar_to_neg'             : 'change'
+        , 'pos_to_neg'               : 'change'
+        , 'pos_to_pos'               : 'no_change'
+        , 'polar_to_pos'             : 'change'
+        , 'neg_to_polar'             : 'change'
+        , 'neg_to_pos'               : 'change'
+}
+
+my_df['electrostatics_change'] = my_df['electrostatics_change'].map(calc_prop_changeD)
+my_df['electrostatics_change'].value_counts()
+
+#--------------------    
+# Summary change: Create a combined column summarising these three cols
+#--------------------
+detect_change = 'change'
+check_prop_cols = ['water_change', 'polarity_change', 'electrostatics_change']
+#my_df['aa_prop_change'] = (my_df.values == detect_change).any(1).astype(int)
+my_df['aa_prop_change'] = (my_df[check_prop_cols].values == detect_change).any(1).astype(int)
+my_df['aa_prop_change'].value_counts()
+my_df['aa_prop_change'].dtype
+
+my_df['aa_prop_change'] = my_df['aa_prop_change'].map({1:'change'
+                                                       , 0: 'no_change'})
+
+my_df['aa_prop_change'].value_counts()
+my_df['aa_prop_change'].dtype
+
+#%% IMPUTE values for OR [check script for exploration: UQ_or_imputer]
+#--------------------
+# Impute OR values
+#--------------------
+#or_cols = ['or_mychisq', 'log10_or_mychisq', 'or_fisher']
+sel_cols = ['mutationinformation', 'or_mychisq', 'log10_or_mychisq']
+or_cols = ['or_mychisq', 'log10_or_mychisq']
+
+print("count of NULL values before imputation\n")
+print(my_df[or_cols].isnull().sum())
+
+my_dfI = pd.DataFrame(index = my_df['mutationinformation'] )
 
     
-    # categorical feature names
-    categorical_FN = ['ss_class'
-                # , 'wt_prop_water'
-                # , 'mut_prop_water'
-                # , 'wt_prop_polarity'
-                # , 'mut_prop_polarity'
-                # , 'wt_calcprop'
-                # , 'mut_calcprop'
-                , 'aa_prop_change'
-                , 'electrostatics_change'
-                , 'polarity_change'
-                , 'water_change'
-                , 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1, not v2]
-                , 'active_site' #[didn't use it for uq_v1]
-                #, 'gene_name' # will be required for the combined stuff
-                 ]
-    #----------------------------------------------
-    # count numerical and categorical features
-    #----------------------------------------------
+my_dfI = pd.DataFrame(KNN(n_neighbors=3, weights="uniform").fit_transform(my_df[or_cols])
+                      , index =  my_df['mutationinformation']
+                      , columns = or_cols )
+my_dfI.columns = ['or_rawI', 'logorI']
+my_dfI.columns
+my_dfI = my_dfI.reset_index(drop = False) # prevents old index from being added as a column
+my_dfI.head()
+print("count of NULL values AFTER imputation\n")
+print(my_dfI.isnull().sum())
+
+#-------------------------------------------
+# OR df Merge: with original based on index
+#-------------------------------------------
+#my_df['index_bm'] = my_df.index
+mydf_imputed = pd.merge(my_df
+                    , my_dfI
+                    , on = 'mutationinformation')
+#mydf_imputed = mydf_imputed.set_index(['index_bm'])
+
+my_df['log10_or_mychisq'].isna().sum()
+mydf_imputed['log10_or_mychisq'].isna().sum()
+mydf_imputed['logorI'].isna().sum() # should be 0
+
+len(my_df.columns)
+len(mydf_imputed.columns)  
+
+#-----------------------------------------
+# REASSIGN my_df after imputing OR values
+#-----------------------------------------
+my_df = mydf_imputed.copy()
+
+if my_df['logorI'].isna().sum() == 0:
+    print('\nPASS: OR values imputed, data ready for ML')
+else:
+    sys.exit('\nFAIL: something went wrong, Data not ready for ML. Please check upstream!')
+
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#---------------------------------------
+# TODO: try other imputation like MICE
+#---------------------------------------
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+#%%########################################################################
+#==========================
+#     Data for ML
+#==========================
+my_df_ml = my_df.copy()
+
+#%% Build X: input for ML
+common_cols_stabiltyN = ['ligand_distance'
+           , 'ligand_affinity_change'
+           , 'duet_stability_change'
+           , 'ddg_foldx'
+           , 'deepddg'
+           , 'ddg_dynamut2'
+           , 'mmcsm_lig'
+           , 'contacts']
+
+# Build stability columns ~ gene
+if gene.lower() in geneL_basic:
+    X_stabilityN = common_cols_stabiltyN
+    cols_to_mask = ['ligand_affinity_change']
     
-    print('\nNo. of numerical features:', len(numerical_FN)
-          , '\nNo. of categorical features:', len(categorical_FN))
+if gene.lower() in geneL_ppi2:
+#    X_stabilityN = common_cols_stabiltyN + ['mcsm_ppi2_affinity' , 'interface_dist'] 
+    geneL_ppi2_st_cols = ['mcsm_ppi2_affinity', 'interface_dist'] 
+    X_stabilityN = common_cols_stabiltyN + geneL_ppi2_st_cols
+    cols_to_mask = ['ligand_affinity_change', 'mcsm_ppi2_affinity']
+
+if gene.lower() in geneL_na:
+#    X_stabilityN = common_cols_stabiltyN + ['mcsm_na_affinity'] 
+    geneL_na_st_cols =  ['mcsm_na_affinity'] 
+    X_stabilityN = common_cols_stabiltyN + geneL_na_st_cols
+    cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity']
+
+if gene.lower() in geneL_na_ppi2:
+#    X_stabilityN = common_cols_stabiltyN + ['mcsm_na_affinity'] + ['mcsm_ppi2_affinity', 'interface_dist'] 
+    geneL_na_ppi2_st_cols = ['mcsm_na_affinity'] + ['mcsm_ppi2_affinity', 'interface_dist'] 
+    X_stabilityN = common_cols_stabiltyN + geneL_na_ppi2_st_cols
+    cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity', 'mcsm_ppi2_affinity']
+
+
+X_foldX_cols = [ 'electro_rr', 'electro_mm', 'electro_sm', 'electro_ss'
+, 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss'
+, 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss'
+, 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss'
+, 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss'
+, 'volumetric_rr', 'volumetric_mm', 'volumetric_ss'
+]
+
+X_str =  ['rsa'
+           #, 'asa'
+           , 'kd_values'
+           , 'rd_values']    
+
+X_ssFN = X_stabilityN + X_str + X_foldX_cols
+
+X_evolFN =  ['consurf_score'
+           , 'snap2_score'
+           , 'provean_score']
     
-    ###########################################################################
-    #=======================
-    # Masking columns:
-    # (mCSM-lig, mCSM-NA, mCSM-ppi2) values for lig_dist >10
-    #=======================
-    # my_df_ml['mutationinformation'][my_df['ligand_distance']>10].value_counts()
-    # my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
-    
-    # my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), 'ligand_affinity_change'] = 0
-    # (my_df_ml['ligand_affinity_change'] == 0).sum()
-    
-    my_df_ml['mutationinformation'][my_df_ml['ligand_distance']>10].value_counts()
-    my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
-    my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask].value_counts()
-    
-    # mask the mcsm affinity related columns where ligand distance > 10
-    my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0
-    (my_df_ml['ligand_affinity_change'] == 0).sum()
-    
-    mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask]  
-    
-    # write file for check
-    mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
-    mask_check.to_csv(outdir + 'ml/' + gene.lower() + '_mask_check.csv')
-    
-    #===================================================
-    # Training and BLIND test set: actual vs imputed
-    # dst with actual values  : training set
-    # dst with imputed values : blind test
-    #==================================================
-    my_df_ml[drug].isna().sum()  #'na' ones are the blind_test set
-    
-    blind_test_df = my_df_ml[my_df_ml[drug].isna()]
-    blind_test_df.shape
-    
-    training_df =  my_df_ml[my_df_ml[drug].notna()]
-    training_df.shape
-    
-    # Target 1: dst_mode
-    training_df[drug].value_counts()
-    training_df['dst_mode'].value_counts()
-    ####################################################################
-    #%% extracting dfs based on numerical, categorical column names
-    #----------------------------------
-    # WITHOUT the target var included
-    #----------------------------------
-    num_df = training_df[numerical_FN]
-    num_df.shape
-    
-    cat_df = training_df[categorical_FN]
-    cat_df.shape
-    
-    all_df = training_df[numerical_FN + categorical_FN]
-    all_df.shape
-    
-    #------------------------------
-    # WITH the target var included:
-        #'wtgt': with target
-    #------------------------------
-    # drug and dst_mode should be the same thing
-    num_df_wtgt = training_df[numerical_FN + ['dst_mode']]
-    num_df_wtgt.shape
-    
-    cat_df_wtgt = training_df[categorical_FN + ['dst_mode']]
-    cat_df_wtgt.shape
-    
-    all_df_wtgt = training_df[numerical_FN + categorical_FN + ['dst_mode']]
-    all_df_wtgt.shape
-    #%%########################################################################
-    #============
-    # ML data
-    #============
-    #------
-    # X: Training and Blind test (BTS)
-    #------
-    X     = all_df_wtgt[numerical_FN + categorical_FN] # training data ALL
-    X_bts = blind_test_df[numerical_FN + categorical_FN] # blind test data ALL
-    #X = all_df_wtgt[numerical_FN] # training numerical only
-    #X_bts = blind_test_df[numerical_FN] # blind test data numerical
-    
-    #------
-    # y
-    #------
-    y = all_df_wtgt['dst_mode'] # training data y
-    y_bts = blind_test_df['dst_mode'] # blind data test y
-    
-    #X_bts_wt = blind_test_df[numerical_FN + ['dst_mode']] 
-    
-    # Quick check
-    #(X['ligand_affinity_change']==0).sum() == (X['ligand_distance']>10).sum()
-    for i in range(len(cols_to_mask)):
-        ind = i+1
-        print('\nindex:', i, '\nind:', ind)
-        print('\nMask count check:'
-              , (my_df_ml[cols_to_mask[i]]==0).sum() == (my_df_ml['ligand_distance']>10).sum()
-              )
-    
-    print('Original Data\n', Counter(y)
-          , 'Data dim:', X.shape)
-    
-    ###########################################################################
-    #%% 
-    ###########################################################################
-    #                               RESAMPLING
-    ###########################################################################
-    #------------------------------
-    # Simple Random oversampling
-    # [Numerical + catgeorical]
-    #------------------------------
-    oversample = RandomOverSampler(sampling_strategy='minority')
-    X_ros, y_ros = oversample.fit_resample(X, y)
-    print('Simple Random OverSampling\n', Counter(y_ros))
-    print(X_ros.shape)
-    
-    #------------------------------
-    # Simple Random Undersampling
-    # [Numerical + catgeorical]
-    #------------------------------
-    undersample = RandomUnderSampler(sampling_strategy='majority')
-    X_rus, y_rus = undersample.fit_resample(X, y)
-    print('Simple Random UnderSampling\n', Counter(y_rus))
-    print(X_rus.shape)
-    
-    #------------------------------
-    # Simple combine ROS and RUS
-    # [Numerical + catgeorical]
-    #------------------------------
-    oversample = RandomOverSampler(sampling_strategy='minority')
-    X_ros, y_ros = oversample.fit_resample(X, y)
-    undersample = RandomUnderSampler(sampling_strategy='majority')
-    X_rouC, y_rouC = undersample.fit_resample(X_ros, y_ros)
-    print('Simple Combined Over and UnderSampling\n',  Counter(y_rouC))
-    print(X_rouC.shape)
-    
-    #------------------------------
-    # SMOTE_NC: oversampling 
-    # [numerical + categorical]
-    #https://stackoverflow.com/questions/47655813/oversampling-smote-for-binary-and-categorical-data-in-python
-    #------------------------------
-    # Determine categorical and numerical features
-    numerical_ix = X.select_dtypes(include=['int64', 'float64']).columns
-    numerical_ix
-    num_featuresL = list(numerical_ix)
-    numerical_colind = X.columns.get_indexer(list(numerical_ix) )
-    numerical_colind
-    
-    categorical_ix = X.select_dtypes(include=['object', 'bool']).columns
-    categorical_ix    
-    categorical_colind = X.columns.get_indexer(list(categorical_ix))
-    categorical_colind
-    
-    k_sm = 5 # 5 is deafult
-    sm_nc = SMOTENC(categorical_features=categorical_colind, k_neighbors = k_sm, **rs, **njobs)
-    X_smnc, y_smnc = sm_nc.fit_resample(X, y)
-    print('SMOTE_NC OverSampling\n', Counter(y_smnc))
-    print(X_smnc.shape)
-    globals().update(locals()) # TROLOLOLOLOLOLS
-    #print("i did a horrible hack :-)")
-    ###############################################################################
-    #%% SMOTE RESAMPLING for NUMERICAL ONLY*
-    # #------------------------------
-    # # SMOTE: Oversampling
-    # # [Numerical ONLY]
-    # #------------------------------
-    # k_sm = 1
-    # sm = SMOTE(sampling_strategy = 'auto', k_neighbors = k_sm, **rs)
-    # X_sm, y_sm = sm.fit_resample(X, y)
-    # print(X_sm.shape)
-    # print('SMOTE OverSampling\n', Counter(y_sm))
-    # y_sm_df = y_sm.to_frame()
-    # y_sm_df.value_counts().plot(kind = 'bar')
-    
-    # #------------------------------
-    # # SMOTE: Over + Undersampling COMBINED
-    # # [Numerical ONLY]
-    # #-----------------------------
-    # sm_enn = SMOTEENN(enn=EditedNearestNeighbours(sampling_strategy='all', **rs, **njobs ))
-    # X_enn, y_enn = sm_enn.fit_resample(X, y)
-    # print(X_enn.shape)
-    # print('SMOTE Over+Under Sampling combined\n', Counter(y_enn))
-    
-    ###############################################################################
-    # TODO: Find over and undersampling JUST for categorical data
+X_genomic_mafor =  ['maf'
+                , 'logorI'
+                # , 'or_rawI'
+                # , 'or_mychisq'
+                # , 'or_logistic'
+                # , 'or_fisher'
+                # , 'pval_fisher'
+                ]
+
+X_genomic_linegae  = ['lineage_proportion'
+                      , 'dist_lineage_proportion'
+                      #, 'lineage' # could be included as a category but it has L2;L4  formatting
+                      , 'lineage_count_all'
+                      , 'lineage_count_unique'
+                      ]
+
+X_genomicFN = X_genomic_mafor + X_genomic_linegae
+
+#X_aaindexFN = list(aa_df_cols)
+
+#print('\nTotal no. of features for aaindex:', len(X_aaindexFN))
+
+# numerical feature names [NO aa_index]
+numerical_FN = X_ssFN  + X_evolFN + X_genomicFN
+
+
+# categorical feature names
+categorical_FN = ['ss_class'
+            # , 'wt_prop_water'
+            # , 'mut_prop_water'
+            # , 'wt_prop_polarity'
+            # , 'mut_prop_polarity'
+            # , 'wt_calcprop'
+            # , 'mut_calcprop'
+            , 'aa_prop_change'
+            , 'electrostatics_change'
+            , 'polarity_change'
+            , 'water_change'
+            , 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1, not v2]
+            , 'active_site' #[didn't use it for uq_v1]
+            #, 'gene_name' # will be required for the combined stuff
+             ]
+#----------------------------------------------
+# count numerical and categorical features
+#----------------------------------------------
+
+print('\nNo. of numerical features:', len(numerical_FN)
+      , '\nNo. of categorical features:', len(categorical_FN))
+
+###########################################################################
+#=======================
+# Masking columns:
+# (mCSM-lig, mCSM-NA, mCSM-ppi2) values for lig_dist >10
+#=======================
+# my_df_ml['mutationinformation'][my_df['ligand_distance']>10].value_counts()
+# my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
+
+# my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), 'ligand_affinity_change'] = 0
+# (my_df_ml['ligand_affinity_change'] == 0).sum()
+
+my_df_ml['mutationinformation'][my_df_ml['ligand_distance']>10].value_counts()
+my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
+my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask].value_counts()
+
+# mask the mcsm affinity related columns where ligand distance > 10
+my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0
+(my_df_ml['ligand_affinity_change'] == 0).sum()
+
+mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask]  
+
+# write file for check
+mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
+mask_check.to_csv(outdir + 'ml/' + gene.lower() + '_mask_check.csv')
+
+#===================================================
+# Training and BLIND test set [UQ]: actual vs imputed
+# No aa index but active_site included
+# dst with actual values  : training set
+# dst with imputed values : blind test
+#==================================================
+my_df_ml[drug].isna().sum()  #'na' ones are the blind_test set
+
+blind_test_df = my_df_ml[my_df_ml[drug].isna()]
+blind_test_df.shape
+
+training_df = my_df_ml[my_df_ml[drug].notna()]
+training_df.shape
+
+# Target 1: dst_mode
+training_df[drug].value_counts()
+training_df['dst_mode'].value_counts()
+####################################################################
+#%% extracting dfs based on numerical, categorical column names
+#----------------------------------
+# WITHOUT the target var included
+#----------------------------------
+num_df = training_df[numerical_FN]
+num_df.shape
+
+cat_df = training_df[categorical_FN]
+cat_df.shape
+
+all_df = training_df[numerical_FN + categorical_FN]
+all_df.shape
+
+#------------------------------
+# WITH the target var included:
+    #'wtgt': with target
+#------------------------------
+# drug and dst_mode should be the same thing
+num_df_wtgt = training_df[numerical_FN + ['dst_mode']]
+num_df_wtgt.shape
+
+cat_df_wtgt = training_df[categorical_FN + ['dst_mode']]
+cat_df_wtgt.shape
+
+all_df_wtgt = training_df[numerical_FN + categorical_FN + ['dst_mode']]
+all_df_wtgt.shape
+#%%########################################################################
+#============
+# ML data
+#============
+#------
+# X: Training and Blind test (BTS)
+#------
+X     = all_df_wtgt[numerical_FN + categorical_FN] # training data ALL
+X_bts = blind_test_df[numerical_FN + categorical_FN] # blind test data ALL
+#X = all_df_wtgt[numerical_FN] # training numerical only
+#X_bts = blind_test_df[numerical_FN] # blind test data numerical
+
+#------
+# y
+#------
+y = all_df_wtgt['dst_mode'] # training data y
+y_bts = blind_test_df['dst_mode'] # blind data test y
+
+#X_bts_wt = blind_test_df[numerical_FN + ['dst_mode']] 
+
+# Quick check
+#(X['ligand_affinity_change']==0).sum() == (X['ligand_distance']>10).sum()
+for i in range(len(cols_to_mask)):
+    ind = i+1
+    print('\nindex:', i, '\nind:', ind)
+    print('\nMask count check:'
+          , (my_df_ml[cols_to_mask[i]]==0).sum() == (my_df_ml['ligand_distance']>10).sum()
+          )
+
+print('Original Data\n', Counter(y)
+      , 'Data dim:', X.shape)
+
+yc1 = Counter(y)
+yc1_ratio = yc1[0]/yc1[1]
+
+yc2 = Counter(y_bts)
+yc2_ratio = yc2[0]/yc2[1]
+
+print('\n-------------------------------------------------------------'
+      , '\nSuccessfully split data: UQ [no aa_index but active site included] training'
+      , '\nactual values: training set'
+      , '\nimputed values: blind test set'
+      , '\nTrain data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\ny_train numbers:', yc1
+      , '\ny_train ratio:',yc1_ratio
+      , '\n'
+      , '\ny_test_numbers:', yc2
+      , '\ny_test ratio:', yc2_ratio
+      , '\n-------------------------------------------------------------'
+      )
+###########################################################################
+#%% 
+###########################################################################
+#                               RESAMPLING
+###########################################################################
+#------------------------------
+# Simple Random oversampling
+# [Numerical + catgeorical]
+#------------------------------
+oversample = RandomOverSampler(sampling_strategy='minority')
+X_ros, y_ros = oversample.fit_resample(X, y)
+print('Simple Random OverSampling\n', Counter(y_ros))
+print(X_ros.shape)
+
+#------------------------------
+# Simple Random Undersampling
+# [Numerical + catgeorical]
+#------------------------------
+undersample = RandomUnderSampler(sampling_strategy='majority')
+X_rus, y_rus = undersample.fit_resample(X, y)
+print('Simple Random UnderSampling\n', Counter(y_rus))
+print(X_rus.shape)
+
+#------------------------------
+# Simple combine ROS and RUS
+# [Numerical + catgeorical]
+#------------------------------
+oversample = RandomOverSampler(sampling_strategy='minority')
+X_ros, y_ros = oversample.fit_resample(X, y)
+undersample = RandomUnderSampler(sampling_strategy='majority')
+X_rouC, y_rouC = undersample.fit_resample(X_ros, y_ros)
+print('Simple Combined Over and UnderSampling\n',  Counter(y_rouC))
+print(X_rouC.shape)
+
+#------------------------------
+# SMOTE_NC: oversampling 
+# [numerical + categorical]
+#https://stackoverflow.com/questions/47655813/oversampling-smote-for-binary-and-categorical-data-in-python
+#------------------------------
+# Determine categorical and numerical features
+numerical_ix = X.select_dtypes(include=['int64', 'float64']).columns
+numerical_ix
+num_featuresL = list(numerical_ix)
+numerical_colind = X.columns.get_indexer(list(numerical_ix) )
+numerical_colind
+
+categorical_ix = X.select_dtypes(include=['object', 'bool']).columns
+categorical_ix    
+categorical_colind = X.columns.get_indexer(list(categorical_ix))
+categorical_colind
+
+k_sm = 5 # 5 is deafult
+sm_nc = SMOTENC(categorical_features=categorical_colind, k_neighbors = k_sm, **rs, **njobs)
+X_smnc, y_smnc = sm_nc.fit_resample(X, y)
+print('SMOTE_NC OverSampling\n', Counter(y_smnc))
+print(X_smnc.shape)
+globals().update(locals()) # TROLOLOLOLOLOLS
+#print("i did a horrible hack :-)")
+###############################################################################
+#%% SMOTE RESAMPLING for NUMERICAL ONLY*
+# #------------------------------
+# # SMOTE: Oversampling
+# # [Numerical ONLY]
+# #------------------------------
+# k_sm = 1
+# sm = SMOTE(sampling_strategy = 'auto', k_neighbors = k_sm, **rs)
+# X_sm, y_sm = sm.fit_resample(X, y)
+# print(X_sm.shape)
+# print('SMOTE OverSampling\n', Counter(y_sm))
+# y_sm_df = y_sm.to_frame()
+# y_sm_df.value_counts().plot(kind = 'bar')
+
+# #------------------------------
+# # SMOTE: Over + Undersampling COMBINED
+# # [Numerical ONLY]
+# #-----------------------------
+# sm_enn = SMOTEENN(enn=EditedNearestNeighbours(sampling_strategy='all', **rs, **njobs ))
+# X_enn, y_enn = sm_enn.fit_resample(X, y)
+# print(X_enn.shape)
+# print('SMOTE Over+Under Sampling combined\n', Counter(y_enn))
+
+###############################################################################
+# TODO: Find over and undersampling JUST for categorical data
diff --git a/scripts/ml/ml_data_rt.py b/scripts/ml/ml_data_rt.py
index 5e5a01b..f235517 100644
--- a/scripts/ml/ml_data_rt.py
+++ b/scripts/ml/ml_data_rt.py
@@ -552,18 +552,18 @@ def setvars(gene,drug):
     
     #=================================================
     # Training and BLIND test set: imputed vs actual
-    # BUT in REVERSE i.e 
+    # BUT in REVERSE i.e.
     # dst with actual values  : blind test
     # dst with imputed values : training set
     #==================================================
     my_df_ml[drug].isna().sum()  #'na' ones are now training set
     
+    blind_test_df = my_df_ml[my_df_ml[drug].notna()]
+    blind_test_df.shape
+
     training_df = my_df_ml[my_df_ml[drug].isna()]
     training_df.shape
     
-    blind_test_df =  my_df_ml[my_df_ml[drug].notna()]
-    blind_test_df.shape
-    
     # Target 1: dst_mode
     training_df[drug].value_counts()
     training_df['dst_mode'].value_counts()
diff --git a/scripts/ml/pnca_config.py b/scripts/ml/pnca_config.py
index ecc34f3..9fc27a8 100755
--- a/scripts/ml/pnca_config.py
+++ b/scripts/ml/pnca_config.py
@@ -32,15 +32,36 @@ from ml_data import *
 # TT run all ML clfs: baseline mode
 from MultModelsCl import MultModelsCl
 
-#%%###########################################################################
-
-print('\n#####################################################################\n')
-
-print('TESTING cmd:'
+############################################################################
+print('\n#####################################################################\n'
+      , '\nRunning ML analysis: UQ [without AA  index but with active site annotations]'
       , '\nGene name:', gene
-      , '\nDrug name:', drug
-      , '\nTotal input features:', X.shape
-      , '\n', Counter(y))
+      , '\nDrug name:', drug)
+
+#==================
+# Specify outdir 
+#==================
+
+outdir_ml = outdir + 'ml/uq_v1/'
+
+print('\nOutput directory:', outdir_ml)
+
+#%%###########################################################################
+print('\nSanity checks:'
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
+      , '\n\n#####################################################################\n')
+
+print('\n================================================================\n')
 
 print('Strucutral features (n):'
       , len(X_ssFN)
@@ -50,11 +71,11 @@ print('Strucutral features (n):'
       , '\nOther struc columns:', X_str
       , '\n================================================================\n')
 
-print('AAindex features (n):'
-      , len(X_aaindexFN)
-      , '\nThese are:\n'
-      , X_aaindexFN
-      , '\n================================================================\n')
+# print('AAindex features (n):'
+#       , len(X_aaindexFN)
+#       , '\nThese are:\n'
+#       , X_aaindexFN
+#       , '\n================================================================\n')
 
 print('Evolutionary features (n):'
       , len(X_evolFN)
@@ -75,20 +96,15 @@ print('Categorical features (n):'
       , categorical_FN
       , '\n================================================================\n')
 
-if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+#if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+if ( len(X.columns) ==  len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
     print('\nPass: No. of features match')
 else:
     sys.exit('\nFail: Count of feature mismatch')
 
 print('\n#####################################################################\n')
-################################################################################
-#==================
-# Specify outdir 
-#==================
 
-outdir_ml = outdir + 'ml/v2/'
-
-################################################################################
+###############################################################################
 #==================
 # Baseline models 
 #==================
diff --git a/scripts/ml/rpob_config.py b/scripts/ml/rpob_config.py
index 15a4cec..a995559 100755
--- a/scripts/ml/rpob_config.py
+++ b/scripts/ml/rpob_config.py
@@ -32,15 +32,36 @@ from ml_data import *
 # TT run all ML clfs: baseline mode
 from MultModelsCl import MultModelsCl
 
-#%%###########################################################################
-
-print('\n#####################################################################\n')
-
-print('TESTING cmd:'
+############################################################################
+print('\n#####################################################################\n'
+      , '\nRunning ML analysis: UQ [without AA  index but with active site annotations]'
       , '\nGene name:', gene
-      , '\nDrug name:', drug
-      , '\nTotal input features:', X.shape
-      , '\n', Counter(y))
+      , '\nDrug name:', drug)
+
+#==================
+# Specify outdir 
+#==================
+
+outdir_ml = outdir + 'ml/uq_v1/'
+
+print('\nOutput directory:', outdir_ml)
+
+#%%###########################################################################
+print('\nSanity checks:'
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
+      , '\n\n#####################################################################\n')
+
+print('\n================================================================\n')
 
 print('Strucutral features (n):'
       , len(X_ssFN)
@@ -50,11 +71,11 @@ print('Strucutral features (n):'
       , '\nOther struc columns:', X_str
       , '\n================================================================\n')
 
-print('AAindex features (n):'
-      , len(X_aaindexFN)
-      , '\nThese are:\n'
-      , X_aaindexFN
-      , '\n================================================================\n')
+# print('AAindex features (n):'
+#       , len(X_aaindexFN)
+#       , '\nThese are:\n'
+#       , X_aaindexFN
+#       , '\n================================================================\n')
 
 print('Evolutionary features (n):'
       , len(X_evolFN)
@@ -75,20 +96,15 @@ print('Categorical features (n):'
       , categorical_FN
       , '\n================================================================\n')
 
-if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+#if ( len(X.columns) ==  len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
+if ( len(X.columns) ==  len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
     print('\nPass: No. of features match')
 else:
     sys.exit('\nFail: Count of feature mismatch')
 
 print('\n#####################################################################\n')
-################################################################################
-#==================
-# Specify outdir 
-#==================
 
-outdir_ml = outdir + 'ml/v2/'
-
-################################################################################
+###############################################################################
 #==================
 # Baseline models 
 #==================
diff --git a/scripts/ml/running_ml_scripts.txt b/scripts/ml/running_ml_scripts.txt
index f9b271a..ae3a8e3 100644
--- a/scripts/ml/running_ml_scripts.txt
+++ b/scripts/ml/running_ml_scripts.txt
@@ -1,49 +1,51 @@
 ==================================
-# BTS data: UQ
-# Features NOT including AA index
-# Date: 30/05/2022, but reran with active_site indication column that was added lateron
-# TODO: put in folder called v1, and UQ in folder called UQ_v0 (will need to rename!)
+# UQ run: same as ORIGINAL except
+# Features NOT including AA index, but includes active site annotations 
+# Date: 30/05/2022, but reran with from my desktop on 19/05/2022 to capture log and include active_site indication column
+# captures error: 2>$1
 =================================
-./pnca_config.py
-./embb_config.py 
-./gid_config.py
-./katg_config.py
-./rpob_config.py
+./pnca_config.py 2>&1 | tee log_pnca_config.txt
+./embb_config.py 2>&1 | tee log_embb_config.txt
+./gid_config.py 2>&1 | tee log_gid_config.txt
+./katg_config.py 2>&1 | tee log_katg_config.txt
+./rpob_config.py 2>&1 | tee log_rpob_config.txt
+./alr_config.py 2>&1 | tee log_alr_config.txt 
+
+# ERROR, as expected, too few values!
 
-## ./alr_config.py = NA
-# ^^^^^^^^ FIXME!
 ########################################################################
 
-==================================
-# BTS data: imputed values
-# All features including AA index
-# Date: 16/05/2022
 =================================
+# Split: ORIGINAL
+# actual values: training set
+# imputed values: blind set
+# All features including AA index
+# Date: 18/05/2022 # reran with minor formatting and log capture
+# captures error: 2>$1
+=================================
+./pnca_orig.py 2>&1 | tee log_pnca_orig.txt
+./embb_orig.py 2>&1 | tee log_embb_orig.txt
+./gid_orig.py 2>&1 | tee log_gid_orig.txt
+./katg_orig.py 2>&1 | tee log_katg_orig.txt
+./rpob_orig.py 2>&1 | tee log_rpob_orig.txt
+./alr_orig.py 2>&1 | tee log_alr_orig.txt # ERROR, as expected, too few values!
 
-./pnca_config.py
-./embb_config.py 
-./gid_config.py
-./katg_config.py
-./rpob_config.py
-
-##./alr_config.py = NA
 ########################################################################
 
 =================================
 # Split: 70/30
 # All features including AA index
-# Date: 17/05/2022 and 18/05/2022
+# Date: 17/05/2022 and 18/05/2022, reran with minor foramtting and log capture
 # captures error: 2>$1
 =================================
-
-./pnca_7030.py
-./embb_7030.py 
-# ./gid_7030.py: problems, CT values are non existing except for rouC
+./pnca_7030.py 2>&1 | tee log_pnca_7030.txt
+./embb_7030.py 2>&1 | tee log_embb_7030.txt
 ./gid_7030.py 2>&1 | tee log_gid_7030.txt
-./katg_7030.py
-./rpob_7030.py
+./katg_7030.py 2>&1 | tee log_katg_7030.txt
+./rpob_7030.py 2>&1 | tee log_rpob_7030.txt
+./alr_7030.py 2>&1 | tee log_alr_7030.txt # ERROR, as expected, too few values!
 
-##./alr_7030.py = NA
+problems, CT values are non existing except for rouC
 ########################################################################
 
 =================================
@@ -57,7 +59,7 @@
 ./gid_8020.py 2>&1 | tee log_gid_8020.txt
 ./katg_8020.py 2>&1 | tee log_katg_8020.txt
 ./rpob_8020.py 2>&1 | tee log_rpob_8020.txt
-./alr_8020.py 2>&1 | tee log_alr_8020.txt
+./alr_8020.py 2>&1 | tee log_alr_8020.txt # ERROR, as expected, too few values!
 
 ########################################################################
 
@@ -72,11 +74,11 @@
 ./gid_sl.py 2>&1 | tee log_gid_sl.txt
 ./katg_sl.py 2>&1 | tee log_katg_sl.txt
 ./rpob_sl.py 2>&1 | tee log_rpob_sl.txt
-./alr_sl.py 2>&1 | tee log_alr_sl.txt
+./alr_sl.py 2>&1 | tee log_alr_sl.txt # ERROR, as expected, too few values!
 
 ########################################################################
 =================================
-# Split: Reverse training
+# Split: REVERSE training
 # imputed values: training set
 # actual values: blind set
 # All features including AA index
@@ -88,7 +90,7 @@
 ./gid_rt.py 2>&1 | tee log_gid_rt.txt
 ./katg_rt.py 2>&1 | tee log_katg_rt.txt
 ./rpob_rt.py 2>&1 | tee log_rpob_rt.txt
-./alr_rt.py 2>&1 | tee log_alr_rt.txt
+./alr_rt.py 2>&1 | tee log_alr_rt.txt # ERROR, as expected, too few values!
 
 ########################################################################
 # COMPLETE Data: actual + na i.e imputed
@@ -105,7 +107,7 @@
 ./gid_cd_7030.py 2>&1 | tee log_gid_cd_7030.txt
 ./katg_cd_7030.py 2>&1 | tee log_katg_cd_7030.txt
 ./rpob_cd_7030.py 2>&1 | tee log_rpob_cd_7030.txt
-./alr_cd_7030.py 2>&1 | tee log_alr_cd_7030.txt
+./alr_cd_7030.py 2>&1 | tee log_alr_cd_7030.txt # ERROR, as expected, too few values!
 
 ########################################################################
 =================================
@@ -119,5 +121,18 @@
 ./gid_cd_8020.py 2>&1 | tee log_gid_cd_8020.txt
 ./katg_cd_8020.py 2>&1 | tee log_katg_cd_8020.txt
 ./rpob_cd_8020.py 2>&1 | tee log_rpob_cd_8020.txt
-./alr_cd_8020.py 2>&1 | tee log_alr_cd_8020.txt
+./alr_cd_8020.py 2>&1 | tee log_alr_cd_8020.txt # ERROR, as expected, too few values!
+
+=================================
+# Split: scaling law [COMPLETE DATA]
+# All features including AA index
+# Date: 18/05/2022
+# captures error: 2>$1
+=================================
+./pnca_cd_sl.py 2>&1 | tee log_pnca_cd_sl.txt
+./embb_cd_sl.py 2>&1 | tee log_embb_cd_sl.txt
+./gid_cd_sl.py 2>&1 | tee log_gid_cd_sl.txt
+./katg_cd_sl.py 2>&1 | tee log_katg_cd_sl.txt
+./rpob_cd_sl.py 2>&1 | tee log_rpob_cd_sl.txt
+./alr_cd_sl.py 2>&1 | tee log_alr_cd_sl.txt # ERROR, as expected, too few values!