25625 lines
1.3 MiB
25625 lines
1.3 MiB
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_7030.py:548: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 531
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 531
|
|
ncols: 286
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 263
|
|
log10_or_mychisq 263
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 167
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 174
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification: 70/30
|
|
Input features data size: (119, 174)
|
|
Train data size: (79, 174)
|
|
Test data size: (40, 174)
|
|
y_train numbers: Counter({0: 50, 1: 29})
|
|
y_train ratio: 1.7241379310344827
|
|
|
|
y_test_numbers: Counter({0: 26, 1: 14})
|
|
y_test ratio: 1.8571428571428572
|
|
-------------------------------------------------------------
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({0: 50, 1: 29}) Data dim: (79, 174)
|
|
|
|
Simple Random OverSampling
|
|
Counter({1: 50, 0: 50})
|
|
(100, 174)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 29, 1: 29})
|
|
(58, 174)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 50, 1: 50})
|
|
(100, 174)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({1: 50, 0: 50})
|
|
(100, 174)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: 70/30 split
|
|
Gene name: gid
|
|
Drug name: streptomycin
|
|
|
|
Output directory: /home/tanu/git/Data/streptomycin/output/ml/tts_7030/
|
|
|
|
Sanity checks:
|
|
Total input features: 174
|
|
|
|
Training data size: (79, 174)
|
|
Test data size: (40, 174)
|
|
|
|
Target feature numbers (training data): Counter({0: 50, 1: 29})
|
|
Target features ratio (training data: 1.7241379310344827
|
|
|
|
Target feature numbers (test data): Counter({0: 26, 1: 14})
|
|
Target features ratio (test data): 1.8571428571428572
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 35
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_na_affinity']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0366919 0.03364944 0.04210711 0.02675176 0.02643323 0.02680922
|
|
0.02558064 0.02712679 0.02658963 0.02430034]
|
|
|
|
mean value: 0.029604005813598632
|
|
|
|
key: score_time
|
|
value: [0.01219821 0.00790191 0.01219344 0.01198339 0.01178885 0.01182723
|
|
0.01177597 0.01183701 0.01194477 0.01177979]
|
|
|
|
mean value: 0.011523056030273437
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.46666667 0.46666667 0.74535599 0.6
|
|
0.77459667 -0.29277002 0.74535599 0.09128709]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.91067388 0.88152145 0.90865445 0.90865445 0.90865445 0.87863248
|
|
0.84744528 0.87830162 0.87830162 0.91085367]
|
|
|
|
mean value: 0.8911693370709752
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.75 0.875 0.75
|
|
0.875 0.5 0.875 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.95774648 0.94366197 0.95774648 0.95774648 0.95774648 0.94366197
|
|
0.92957746 0.94366197 0.94366197 0.95833333]
|
|
|
|
mean value: 0.9493544600938967
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.66666667 0.66666667 0.8 0.75
|
|
0.85714286 0. 0.8 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.93877551 0.91666667 0.94117647 0.94117647 0.94117647 0.92307692
|
|
0.90196078 0.92 0.92 0.94339623]
|
|
|
|
mean value: 0.9287405522441197
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.66666667 1. 0.6
|
|
0.75 0. 1. 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.96 0.96 0.96 0.92307692
|
|
0.92 0.95833333 0.95833333 0.96153846]
|
|
|
|
mean value: 0.9601282051282052
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.66666667 1.
|
|
1. 0. 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.88461538 0.84615385 0.92307692 0.92307692 0.92307692 0.92307692
|
|
0.88461538 0.88461538 0.88461538 0.92592593]
|
|
|
|
mean value: 0.9002849002849003
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.73333333 0.73333333 0.83333333 0.8
|
|
0.9 0.4 0.83333333 0.55 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.94230769 0.92307692 0.95042735 0.95042735 0.95042735 0.93931624
|
|
0.92008547 0.93119658 0.93119658 0.95185185]
|
|
|
|
mean value: 0.9390313390313391
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.5 0.5 0.66666667 0.6
|
|
0.75 0. 0.66666667 0.25 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.88461538 0.84615385 0.88888889 0.88888889 0.88888889 0.85714286
|
|
0.82142857 0.85185185 0.85185185 0.89285714]
|
|
|
|
mean value: 0.8672568172568172
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.61568356 0.59473109 0.68769026 0.86828899 0.86762929 0.69212842
|
|
0.64828324 0.55574918 0.70834208 0.64952207]
|
|
|
|
mean value: 0.688804817199707
|
|
|
|
key: score_time
|
|
value: [0.01196933 0.00655389 0.01198149 0.01488495 0.01500368 0.012532
|
|
0.01223516 0.01520872 0.01664925 0.01557779]
|
|
|
|
mean value: 0.013259625434875489
|
|
|
|
key: test_mcc
|
|
value: [0.1490712 nan 0.46666667 0.46666667 0.46666667 0.6
|
|
0.6 0.1490712 0.46666667 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.82385255 1. 0.96986363 0.9703421 1. 0.93931624
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9703374516508763
|
|
|
|
key: test_accuracy
|
|
value: [0.625 nan 0.75 0.75 0.75 0.75
|
|
0.75 0.625 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.91549296 1. 0.98591549 0.98591549 1. 0.97183099
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9859154929577465
|
|
|
|
key: test_fscore
|
|
value: [0.4 nan 0.66666667 0.66666667 0.66666667 0.75
|
|
0.75 0.4 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.86956522 1. 0.98039216 0.98113208 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9792627911264209
|
|
|
|
key: test_precision
|
|
value: [0.5 nan 0.66666667 0.66666667 0.66666667 0.6
|
|
0.6 0.5 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.96296296 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9924501424501424
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.66666667 1.
|
|
1. 0.33333333 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.76923077 1. 0.96153846 1. 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9692307692307692
|
|
|
|
key: test_roc_auc
|
|
value: [0.56666667 nan 0.73333333 0.73333333 0.73333333 0.8
|
|
0.8 0.56666667 0.73333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.88461538 1. 0.98076923 0.98888889 1. 0.96965812
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9823931623931624
|
|
|
|
key: test_jcc
|
|
value: [0.25 nan 0.5 0.5 0.5 0.6 0.6 0.25 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.76923077 1. 0.96153846 0.96296296 1. 0.92592593
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.961965811965812
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01205611 0.01126623 0.00906897 0.00851703 0.008286 0.00828934
|
|
0.00834227 0.00843191 0.00838542 0.00838542]
|
|
|
|
mean value: 0.009102869033813476
|
|
|
|
key: score_time
|
|
value: [0.01170921 0.00516486 0.00907874 0.00863242 0.00857592 0.00856733
|
|
0.00852275 0.0085392 0.00850701 0.00861979]
|
|
|
|
mean value: 0.008591723442077637
|
|
|
|
key: test_mcc
|
|
value: [-0.06666667 nan 0.06666667 -0.25819889 0. 0.6
|
|
0.06666667 -0.06666667 -0.46666667 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.63589744 0.3217793 0.4760037 0.61337378 0.48136848 0.59111411
|
|
0.59101806 0.61021596 0.61021596 0.61560271]
|
|
|
|
mean value: 0.554658949505139
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan 0.5 0.375 0.375 0.75
|
|
0.5 0.5 0.25 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.83098592 0.54929577 0.64788732 0.76056338 0.67605634 0.77464789
|
|
0.76056338 0.77464789 0.77464789 0.77777778]
|
|
|
|
mean value: 0.7327073552425665
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 nan 0.5 0.28571429 0.54545455 0.75
|
|
0.5 0.33333333 0.25 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.76923077 0.6097561 0.67532468 0.75362319 0.68493151 0.75
|
|
0.74626866 0.75757576 0.75757576 0.76470588]
|
|
|
|
mean value: 0.7268992291592407
|
|
|
|
key: test_precision
|
|
value: [0.33333333 nan 0.4 0.25 0.375 0.6
|
|
0.4 0.33333333 0.2 0.5 ]
|
|
|
|
mean value: nan
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
|
|
key: train_precision
|
|
value: [0.76923077 0.44642857 0.50980392 0.60465116 0.53191489 0.63157895
|
|
0.6097561 0.625 0.625 0.63414634]
|
|
|
|
mean value: 0.5987510705028498
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 1. 1.
|
|
0.66666667 0.33333333 0.33333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.76923077 0.96153846 1. 1. 0.96153846 0.92307692
|
|
0.96153846 0.96153846 0.96153846 0.96296296]
|
|
|
|
mean value: 0.9462962962962963
|
|
|
|
key: test_roc_auc
|
|
value: [0.46666667 nan 0.53333333 0.36666667 0.5 0.8
|
|
0.53333333 0.46666667 0.26666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.81794872 0.63632479 0.72222222 0.81111111 0.73632479 0.80598291
|
|
0.80299145 0.81410256 0.81410256 0.81481481]
|
|
|
|
mean value: 0.7775925925925926
|
|
|
|
key: test_jcc
|
|
value: [0.2 nan 0.33333333 0.16666667 0.375 0.6
|
|
0.33333333 0.2 0.14285714 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.625 0.43859649 0.50980392 0.60465116 0.52083333 0.6
|
|
0.5952381 0.6097561 0.6097561 0.61904762]
|
|
|
|
mean value: 0.5732682818328394
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00886774 0.00868058 0.00883889 0.00872326 0.00893021 0.00871801
|
|
0.00855303 0.00864196 0.00846744 0.00878453]
|
|
|
|
mean value: 0.00872056484222412
|
|
|
|
key: score_time
|
|
value: [0.00870299 0.0042479 0.00908852 0.00942707 0.00868988 0.00882626
|
|
0.00883055 0.00864148 0.00859904 0.0089457 ]
|
|
|
|
mean value: 0.00839993953704834
|
|
|
|
key: test_mcc
|
|
value: [-0.29277002 nan 0.1490712 0. 0.46666667 0.25819889
|
|
-0.29277002 -0.29277002 0.1490712 -0.25819889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.50503962 0.50503962 0.56963094 0.56963094 0.4660252 0.532629
|
|
0.53764379 0.53764379 0.4660252 0.621059 ]
|
|
|
|
mean value: 0.5310367097131722
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan 0.625 0.625 0.75 0.625
|
|
0.5 0.5 0.625 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.77464789 0.77464789 0.8028169 0.8028169 0.76056338 0.78873239
|
|
0.78873239 0.78873239 0.76056338 0.81944444]
|
|
|
|
mean value: 0.7861697965571205
|
|
|
|
key: test_fscore
|
|
value: [0. nan 0.4 0. 0.66666667 0.57142857
|
|
0. 0. 0.4 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.6 0.6 0.66666667 0.66666667 0.58536585 0.65116279
|
|
0.63414634 0.63414634 0.58536585 0.69767442]
|
|
|
|
mean value: 0.6321194932879561
|
|
|
|
key: test_precision
|
|
value: [0. nan 0.5 0. 0.66666667 0.5
|
|
0. 0. 0.5 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.85714286 0.85714286 0.875 0.875 0.8 0.82352941
|
|
0.86666667 0.86666667 0.8 0.9375 ]
|
|
|
|
mean value: 0.8558648459383753
|
|
|
|
key: test_recall
|
|
value: [0. nan 0.33333333 0. 0.66666667 0.66666667
|
|
0. 0. 0.33333333 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.46153846 0.46153846 0.53846154 0.53846154 0.46153846 0.53846154
|
|
0.5 0.5 0.46153846 0.55555556]
|
|
|
|
mean value: 0.5017094017094017
|
|
|
|
key: test_roc_auc
|
|
value: [0.4 nan 0.56666667 0.5 0.73333333 0.63333333
|
|
0.4 0.4 0.56666667 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.70854701 0.70854701 0.74700855 0.74700855 0.6974359 0.73589744
|
|
0.72777778 0.72777778 0.6974359 0.76666667]
|
|
|
|
mean value: 0.7264102564102564
|
|
|
|
key: test_jcc
|
|
value: [0. nan 0.25 0. 0.5 0.4 0. 0. 0.25 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.42857143 0.42857143 0.5 0.5 0.4137931 0.48275862
|
|
0.46428571 0.46428571 0.4137931 0.53571429]
|
|
|
|
mean value: 0.4631773399014778
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00874114 0.01144648 0.0086658 0.00911021 0.00933123 0.00923896
|
|
0.00934291 0.00915861 0.00966763 0.00907493]
|
|
|
|
mean value: 0.009377789497375489
|
|
|
|
key: score_time
|
|
value: [0.0487206 0.006109 0.01472044 0.01498747 0.0102694 0.01016641
|
|
0.01003814 0.01018739 0.01121283 0.0092566 ]
|
|
|
|
mean value: 0.014566826820373534
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.46666667 0.48795004 0.46666667 0.25819889
|
|
0.48795004 0. -0.4472136 0.64549722]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.39440661 0.50503962 0.35808137 0.42968701 0.50503962 0.51530373
|
|
0.4660252 0.49787306 0.56963094 0.52098273]
|
|
|
|
mean value: 0.47620698672135525
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.75 0.75 0.625
|
|
0.75 0.625 0.375 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.73239437 0.77464789 0.71830986 0.74647887 0.77464789 0.77464789
|
|
0.76056338 0.77464789 0.8028169 0.77777778]
|
|
|
|
mean value: 0.7636932707355243
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.66666667 0.5 0.66666667 0.57142857
|
|
0.5 0. 0. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.53658537 0.6 0.5 0.57142857 0.6 0.57894737
|
|
0.58536585 0.63636364 0.66666667 0.61904762]
|
|
|
|
mean value: 0.5894405081439741
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 1. 0.66666667 0.5
|
|
1. 0. 0. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.73333333 0.85714286 0.71428571 0.75 0.85714286 0.91666667
|
|
0.8 0.77777778 0.875 0.86666667]
|
|
|
|
mean value: 0.8148015873015872
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 0.66666667 0.66666667
|
|
0.33333333 0. 0. 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.42307692 0.46153846 0.38461538 0.46153846 0.46153846 0.42307692
|
|
0.46153846 0.53846154 0.53846154 0.48148148]
|
|
|
|
mean value: 0.4635327635327635
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.73333333 0.66666667 0.73333333 0.63333333
|
|
0.66666667 0.5 0.3 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.66709402 0.70854701 0.64786325 0.68632479 0.70854701 0.70042735
|
|
0.6974359 0.72478632 0.74700855 0.71851852]
|
|
|
|
mean value: 0.7006552706552707
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.5 0.33333333 0.5 0.4
|
|
0.33333333 0. 0. 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.36666667 0.42857143 0.33333333 0.4 0.42857143 0.40740741
|
|
0.4137931 0.46666667 0.5 0.44827586]
|
|
|
|
mean value: 0.41932858967341724
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01034451 0.0102098 0.00897241 0.00901365 0.0088799 0.00927591
|
|
0.00998259 0.01031137 0.01042175 0.01003623]
|
|
|
|
mean value: 0.009744811058044433
|
|
|
|
key: score_time
|
|
value: [0.0098021 0.00447941 0.0090816 0.00874925 0.00895143 0.00880194
|
|
0.0096159 0.00961876 0.00949502 0.00887012]
|
|
|
|
mean value: 0.00874655246734619
|
|
|
|
key: test_mcc
|
|
value: [0.48795004 nan 0.48795004 0.48795004 0.48795004 0.74535599
|
|
0. 0. 0.48795004 0.3 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.68088097 0.6228411 0.56330071 0.59331346 0.56330071 0.6228411
|
|
0.56330071 0.68088097 0.6228411 0.66226618]
|
|
|
|
mean value: 0.6175767019079942
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.75 0.75 0.875
|
|
0.625 0.625 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.84507042 0.81690141 0.78873239 0.8028169 0.78873239 0.81690141
|
|
0.78873239 0.84507042 0.81690141 0.83333333]
|
|
|
|
mean value: 0.814319248826291
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.5 0.5 0.5 0.8 0. 0. 0.5 0.5]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.73170732 0.66666667 0.59459459 0.63157895 0.59459459 0.66666667
|
|
0.59459459 0.73170732 0.66666667 0.71428571]
|
|
|
|
mean value: 0.659306307958426
|
|
|
|
key: test_precision
|
|
value: [1. nan 1. 1. 1. 1. 0. 0. 1. 0.5]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.33333333 0.33333333 0.33333333 0.66666667
|
|
0. 0. 0.33333333 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.57692308 0.5 0.42307692 0.46153846 0.42307692 0.5
|
|
0.42307692 0.57692308 0.5 0.55555556]
|
|
|
|
mean value: 0.49401709401709404
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.66666667 0.66666667 0.66666667 0.83333333
|
|
0.5 0.5 0.66666667 0.65 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.78846154 0.75 0.71153846 0.73076923 0.71153846 0.75
|
|
0.71153846 0.78846154 0.75 0.77777778]
|
|
|
|
mean value: 0.747008547008547
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.33333333 0.33333333 0.33333333 0.66666667
|
|
0. 0. 0.33333333 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.57692308 0.5 0.42307692 0.46153846 0.42307692 0.5
|
|
0.42307692 0.57692308 0.5 0.55555556]
|
|
|
|
mean value: 0.49401709401709404
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.55261087 0.37978768 0.40832138 0.53674722 0.38702536 0.39668012
|
|
0.45907712 0.38502288 0.56192374 0.36470985]
|
|
|
|
mean value: 0.4431906223297119
|
|
|
|
key: score_time
|
|
value: [0.0121274 0.00678778 0.01212931 0.01215839 0.01212978 0.01212168
|
|
0.01210904 0.01213956 0.01217985 0.01214075]
|
|
|
|
mean value: 0.011602354049682618
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.46666667 0.25819889 0.46666667 0.6
|
|
0.77459667 -0.29277002 0.46666667 -0.09128709]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.625 0.75 0.75
|
|
0.875 0.5 0.75 0.42857143]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.66666667 0.57142857 0.66666667 0.75
|
|
0.85714286 0. 0.66666667 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.5 0.66666667 0.6
|
|
0.75 0. 0.66666667 0.25 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.66666667 1.
|
|
1. 0. 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.66666667 nan 0.73333333 0.63333333 0.73333333 0.8
|
|
0.9 0.4 0.73333333 0.45 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.5 0.4 0.5 0.6
|
|
0.75 0. 0.5 0.2 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0133059 0.01309037 0.01105928 0.00985241 0.00974655 0.00949502
|
|
0.00951028 0.00971913 0.00915694 0.00968456]
|
|
|
|
mean value: 0.010462045669555664
|
|
|
|
key: score_time
|
|
value: [0.01178765 0.00480032 0.00885653 0.00857639 0.00867677 0.00849724
|
|
0.00837517 0.00841808 0.0084424 0.00845408]
|
|
|
|
mean value: 0.00848846435546875
|
|
|
|
key: test_mcc
|
|
value: [0.25819889 nan 0.77459667 0.77459667 1. 0.6
|
|
0.74535599 0.74535599 0.74535599 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 nan 0.875 0.875 1. 0.75
|
|
0.875 0.875 0.875 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 nan 0.85714286 0.85714286 1. 0.75
|
|
0.8 0.8 0.8 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 nan 0.75 0.75 1. 0.6 1. 1. 1. 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 1. 1. 1. 1.
|
|
0.66666667 0.66666667 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.63333333 nan 0.9 0.9 1. 0.8
|
|
0.83333333 0.83333333 0.83333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 nan 0.75 0.75 1. 0.6
|
|
0.66666667 0.66666667 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07962322 0.07938647 0.0790019 0.07999277 0.07923889 0.0796628
|
|
0.0828433 0.08033228 0.08008289 0.08019423]
|
|
|
|
mean value: 0.0800358772277832
|
|
|
|
key: score_time
|
|
value: [0.0167439 0.00441599 0.01676273 0.01670766 0.01685452 0.01676798
|
|
0.01670623 0.0169127 0.01742005 0.01756716]
|
|
|
|
mean value: 0.015685892105102538
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.74535599 -0.06666667 0.48795004 0.6
|
|
0.74535599 -0.29277002 0.1490712 0.3 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.875 0.5 0.75 0.75
|
|
0.875 0.5 0.625 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.8 0.33333333 0.5 0.75
|
|
0.8 0. 0.4 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 1. 0.33333333 1. 0.6
|
|
1. 0. 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 0.33333333 1.
|
|
0.66666667 0. 0.33333333 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.83333333 0.46666667 0.66666667 0.8
|
|
0.83333333 0.4 0.56666667 0.65 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.66666667 0.2 0.33333333 0.6
|
|
0.66666667 0. 0.25 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00855803 0.00837731 0.0085175 0.00922489 0.00838661 0.0093317
|
|
0.00823355 0.00841951 0.00875092 0.00846076]
|
|
|
|
mean value: 0.008626079559326172
|
|
|
|
key: score_time
|
|
value: [0.0088222 0.00437546 0.00849724 0.00917506 0.00854778 0.00906491
|
|
0.00846219 0.0085783 0.00849533 0.00848794]
|
|
|
|
mean value: 0.00825064182281494
|
|
|
|
key: test_mcc
|
|
value: [ 0.1490712 nan 0.46666667 -0.06666667 0.1490712 1.
|
|
0.77459667 -0.4472136 0.46666667 -0.25819889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 nan 0.75 0.5 0.625 1.
|
|
0.875 0.375 0.75 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 nan 0.66666667 0.33333333 0.4 1.
|
|
0.85714286 0. 0.66666667 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 nan 0.66666667 0.33333333 0.5 1.
|
|
0.75 0. 0.66666667 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 0.33333333 1.
|
|
1. 0. 0.66666667 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.56666667 nan 0.73333333 0.46666667 0.56666667 1.
|
|
0.9 0.3 0.73333333 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 nan 0.5 0.2 0.25 1. 0.75 0. 0.5 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42) /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.99221253 1.00797582 1.01229548 1.01305676 0.99885559 0.99018335
|
|
0.99298692 0.98534179 0.99022436 0.99618149]
|
|
|
|
mean value: 0.9979314088821412
|
|
|
|
key: score_time
|
|
value: [0.09150147 0.00473714 0.09475374 0.14177513 0.09324384 0.088516
|
|
0.09004092 0.0887692 0.08675432 0.09039307]
|
|
|
|
mean value: 0.08704848289489746
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.74535599 0.25819889 0.48795004 0.77459667
|
|
0.74535599 -0.29277002 0.48795004 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.875 0.625 0.75 0.875
|
|
0.875 0.5 0.75 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.8 0.57142857 0.5 0.85714286
|
|
0.8 0. 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 1. 0.5 1. 0.75
|
|
1. 0. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.33333333 1.
|
|
0.66666667 0. 0.33333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.83333333 0.63333333 0.66666667 0.9
|
|
0.83333333 0.4 0.66666667 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.66666667 0.4 0.33333333 0.75
|
|
0.66666667 0. 0.33333333 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
|
|
key: fit_time
|
|
value: [1.71230912 0.82662535 0.84278703 0.86609745 0.93222427 0.88134384
|
|
0.90477657 0.81548691 0.85592294 0.81256628]
|
|
|
|
mean value: 0.9450139760971069
|
|
|
|
key: score_time
|
|
value: [0.17363358 0.00458622 0.17959046 0.21361065 0.21203494 0.17785597
|
|
0.22314191 0.22879744 0.1943121 0.22552323]
|
|
|
|
mean value: 0.18330864906311034
|
|
|
|
key: test_mcc
|
|
value: [0.48795004 nan 0.46666667 0.48795004 0.48795004 0.74535599
|
|
0.74535599 0.48795004 0.48795004 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.88152145 0.91067388 0.94010481 0.96986363 0.94010481 0.96986363
|
|
0.94010481 0.94010481 0.94010481 0.97058178]
|
|
|
|
mean value: 0.9403028409607979
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.75 0.75 0.875
|
|
0.875 0.75 0.75 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.94366197 0.95774648 0.97183099 0.98591549 0.97183099 0.98591549
|
|
0.97183099 0.97183099 0.97183099 0.98611111]
|
|
|
|
mean value: 0.9718505477308295
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.66666667 0.5 0.5 0.8
|
|
0.8 0.5 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.91666667 0.93877551 0.96 0.98039216 0.96 0.98039216
|
|
0.96 0.96 0.96 0.98113208]
|
|
|
|
mean value: 0.9597358566067937
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 1. 1. 1.
|
|
1. 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 0.33333333 0.66666667
|
|
0.66666667 0.33333333 0.33333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.84615385 0.88461538 0.92307692 0.96153846 0.92307692 0.96153846
|
|
0.92307692 0.92307692 0.92307692 0.96296296]
|
|
|
|
mean value: 0.9232193732193732
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.73333333 0.66666667 0.66666667 0.83333333
|
|
0.83333333 0.66666667 0.66666667 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.92307692 0.94230769 0.96153846 0.98076923 0.96153846 0.98076923
|
|
0.96153846 0.96153846 0.96153846 0.98148148]
|
|
|
|
mean value: 0.9616096866096866
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.5 0.33333333 0.33333333 0.66666667
|
|
0.66666667 0.33333333 0.33333333 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.84615385 0.88461538 0.92307692 0.96153846 0.92307692 0.96153846
|
|
0.92307692 0.92307692 0.92307692 0.96296296]
|
|
|
|
mean value: 0.9232193732193732
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00989461 0.00995517 0.01037693 0.00869536 0.00957036 0.00846767
|
|
0.00956583 0.00920486 0.00919366 0.00914788]
|
|
|
|
mean value: 0.009407234191894532
|
|
|
|
key: score_time
|
|
value: [0.00942397 0.00489521 0.00996447 0.0094893 0.00896859 0.00871682
|
|
0.00867128 0.00852704 0.00943971 0.00916314]
|
|
|
|
mean value: 0.008725953102111817
|
|
|
|
key: test_mcc
|
|
value: [-0.29277002 nan 0.1490712 0. 0.46666667 0.25819889
|
|
-0.29277002 -0.29277002 0.1490712 -0.25819889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.50503962 0.50503962 0.56963094 0.56963094 0.4660252 0.532629
|
|
0.53764379 0.53764379 0.4660252 0.621059 ]
|
|
|
|
mean value: 0.5310367097131722
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan 0.625 0.625 0.75 0.625
|
|
0.5 0.5 0.625 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.77464789 0.77464789 0.8028169 0.8028169 0.76056338 0.78873239
|
|
0.78873239 0.78873239 0.76056338 0.81944444]
|
|
|
|
mean value: 0.7861697965571205
|
|
|
|
key: test_fscore
|
|
value: [0. nan 0.4 0. 0.66666667 0.57142857
|
|
0. 0. 0.4 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.6 0.6 0.66666667 0.66666667 0.58536585 0.65116279
|
|
0.63414634 0.63414634 0.58536585 0.69767442]
|
|
|
|
mean value: 0.6321194932879561
|
|
|
|
key: test_precision
|
|
value: [0. nan 0.5 0. 0.66666667 0.5
|
|
0. 0. 0.5 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.85714286 0.85714286 0.875 0.875 0.8 0.82352941
|
|
0.86666667 0.86666667 0.8 0.9375 ]
|
|
|
|
mean value: 0.8558648459383753
|
|
|
|
key: test_recall
|
|
value: [0. nan 0.33333333 0. 0.66666667 0.66666667
|
|
0. 0. 0.33333333 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.46153846 0.46153846 0.53846154 0.53846154 0.46153846 0.53846154
|
|
0.5 0.5 0.46153846 0.55555556]
|
|
|
|
mean value: 0.5017094017094017
|
|
|
|
key: test_roc_auc
|
|
value: [0.4 nan 0.56666667 0.5 0.73333333 0.63333333
|
|
0.4 0.4 0.56666667 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.70854701 0.70854701 0.74700855 0.74700855 0.6974359 0.73589744
|
|
0.72777778 0.72777778 0.6974359 0.76666667]
|
|
|
|
mean value: 0.7264102564102564
|
|
|
|
key: test_jcc
|
|
value: [0. nan 0.25 0. 0.5 0.4 0. 0. 0.25 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
|
|
value: [0.42857143 0.42857143 0.5 0.5 0.4137931 0.48275862
|
|
0.46428571 0.46428571 0.4137931 0.53571429]
|
|
|
|
mean value: 0.4631773399014778
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.14565539 0.03244257 0.05331469 0.03653431 0.03764868 0.03769803
|
|
0.0387702 0.03797412 0.03629756 0.06974673]
|
|
|
|
mean value: 0.05260822772979736
|
|
|
|
key: score_time
|
|
value: [0.01081181 0.00494266 0.01059127 0.01046586 0.01142001 0.01110435
|
|
0.01143312 0.01069999 0.01111579 0.01013684]
|
|
|
|
mean value: 0.01027216911315918
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.74535599 1. 1. 0.6
|
|
0.74535599 1. 0.74535599 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.875 1. 1. 0.75
|
|
0.875 1. 0.875 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.8 1. 1. 0.75 0.8 1. 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 1. 1. 1. 0.6
|
|
1. 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 1. 1.
|
|
0.66666667 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.83333333 1. 1. 0.8
|
|
0.83333333 1. 0.83333333 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.66666667 1. 1. 0.6
|
|
0.66666667 1. 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02565622 0.01628876 0.04023051 0.03998733 0.04027724 0.03976345
|
|
0.03319478 0.03937459 0.03932738 0.03963566]
|
|
|
|
mean value: 0.035373592376708986
|
|
|
|
key: score_time
|
|
value: [0.01197481 0.00604296 0.02297831 0.0204556 0.02054095 0.02182841
|
|
0.02347851 0.02111363 0.02293038 0.02010679]
|
|
|
|
mean value: 0.01914503574371338
|
|
|
|
key: test_mcc
|
|
value: [ 0.77459667 nan 0.77459667 0.25819889 0.1490712 0.25819889
|
|
0.74535599 -0.06666667 0.06666667 0.3 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.96986363 0.96986363 1. 0.96986363 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9909590875629278
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.875 0.625 0.625 0.625
|
|
0.875 0.5 0.5 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98591549 0.98591549 1. 0.98591549 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.995774647887324
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 nan 0.85714286 0.57142857 0.4 0.57142857
|
|
0.8 0.33333333 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98039216 0.98039216 1. 0.98039216 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9941176470588236
|
|
|
|
key: test_precision
|
|
value: [0.75 nan 0.75 0.5 0.5 0.5
|
|
1. 0.33333333 0.4 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan 1. 0.66666667 0.33333333 0.66666667
|
|
0.66666667 0.33333333 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.96153846 0.96153846 1. 0.96153846 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9884615384615385
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan 0.9 0.63333333 0.56666667 0.63333333
|
|
0.83333333 0.46666667 0.53333333 0.65 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98076923 0.98076923 1. 0.98076923 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9942307692307693
|
|
|
|
key: test_jcc
|
|
value: [0.75 nan 0.75 0.4 0.25 0.4
|
|
0.66666667 0.2 0.33333333 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.96153846 0.96153846 1. 0.96153846 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9884615384615385
|
|
|
|
MCC on Blind test: -0.1
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01452279 0.00926256 0.01047921 0.00929594 0.00924563 0.00948405
|
|
0.00887465 0.00973344 0.00907183 0.00911689]
|
|
|
|
mean value: 0.009908699989318847
|
|
|
|
key: score_time
|
|
value: [0.00940919 0.00476885 0.00952935 0.00888324 0.00898743 0.00851965
|
|
0.00895286 0.00930977 0.00942397 0.00866628]
|
|
|
|
mean value: 0.008645057678222656
|
|
|
|
key: test_mcc
|
|
value: [-0.06666667 nan 0.74535599 0.1490712 0.74535599 0.48795004
|
|
0.46666667 0.1490712 0.48795004 0.3 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.5638813 0.56963094 0.59712276 0.66078849 0.59712276 0.59831714
|
|
0.62887412 0.56542032 0.56928569 0.57594697]
|
|
|
|
mean value: 0.5926390492106871
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan 0.875 0.625 0.875 0.75
|
|
0.75 0.625 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.8028169 0.8028169 0.81690141 0.84507042 0.81690141 0.81690141
|
|
0.83098592 0.8028169 0.8028169 0.80555556]
|
|
|
|
mean value: 0.8143583724569641
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 nan 0.8 0.4 0.8 0.5
|
|
0.66666667 0.4 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.69565217 0.66666667 0.71111111 0.7755102 0.71111111 0.73469388
|
|
0.73913043 0.70833333 0.72 0.70833333]
|
|
|
|
mean value: 0.7170542245883861
|
|
|
|
key: test_precision
|
|
value: [0.33333333 nan 1. 0.5 1. 1.
|
|
0.66666667 0.5 1. 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.8 0.875 0.84210526 0.82608696 0.84210526 0.7826087
|
|
0.85 0.77272727 0.75 0.80952381]
|
|
|
|
mean value: 0.8150157260740785
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 0.66666667 0.33333333
|
|
0.66666667 0.33333333 0.33333333 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.61538462 0.53846154 0.61538462 0.73076923 0.61538462 0.69230769
|
|
0.65384615 0.65384615 0.69230769 0.62962963]
|
|
|
|
mean value: 0.6437321937321937
|
|
|
|
key: test_roc_auc
|
|
value: [0.46666667 nan 0.83333333 0.56666667 0.83333333 0.66666667
|
|
0.73333333 0.56666667 0.66666667 0.65 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.76324786 0.74700855 0.77435897 0.82094017 0.77435897 0.79059829
|
|
0.79358974 0.77136752 0.77948718 0.77037037]
|
|
|
|
mean value: 0.7785327635327636
|
|
|
|
key: test_jcc
|
|
value: [0.2 nan 0.66666667 0.25 0.66666667 0.33333333
|
|
0.5 0.25 0.33333333 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.53333333 0.5 0.55172414 0.63333333 0.55172414 0.58064516
|
|
0.5862069 0.5483871 0.5625 0.5483871 ]
|
|
|
|
mean value: 0.5596241193919169
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01042056 0.01261353 0.01291299 0.0122683 0.01264381 0.01341867
|
|
0.01380587 0.02929306 0.03145814 0.01436043]
|
|
|
|
mean value: 0.016319537162780763
|
|
|
|
key: score_time
|
|
value: [0.00941873 0.00572133 0.01120472 0.01170421 0.01149988 0.0115881
|
|
0.01157069 0.01993561 0.01752448 0.01183438]
|
|
|
|
mean value: 0.012200212478637696
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.46666667 0.25819889 0.46666667 0.29277002
|
|
0.77459667 -0.29277002 0.46666667 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.91067388 0.91067388 0.94196687 0.90865445 0.91067388 0.88861107
|
|
0.9703421 0.90865445 0.94196687 0.97100831]
|
|
|
|
mean value: 0.9263225781397904
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.625 0.75 0.5
|
|
0.875 0.5 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.95774648 0.95774648 0.97183099 0.95774648 0.95774648 0.94366197
|
|
0.98591549 0.95774648 0.97183099 0.98611111]
|
|
|
|
mean value: 0.9648082942097026
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.66666667 0.57142857 0.66666667 0.6
|
|
0.85714286 0. 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.93877551 0.93877551 0.96296296 0.94117647 0.93877551 0.92857143
|
|
0.98113208 0.94117647 0.96296296 0.98181818]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
0.9516127083575949
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.5 0.66666667 0.42857143
|
|
0.75 0. 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.92857143 0.96 1. 0.86666667
|
|
0.96296296 0.96 0.92857143 0.96428571]
|
|
|
|
mean value: 0.9571058201058201
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.66666667 1.
|
|
1. 0. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.88461538 0.88461538 1. 0.92307692 0.88461538 1.
|
|
1. 0.92307692 1. 1. ]
|
|
|
|
mean value: 0.95
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.73333333 0.63333333 0.73333333 0.6
|
|
0.9 0.4 0.73333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.94230769 0.94230769 0.97777778 0.95042735 0.94230769 0.95555556
|
|
0.98888889 0.95042735 0.97777778 0.98888889]
|
|
|
|
mean value: 0.9616666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.5 0.4 0.5 0.42857143
|
|
0.75 0. 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.88461538 0.88461538 0.92857143 0.88888889 0.88461538 0.86666667
|
|
0.96296296 0.88888889 0.92857143 0.96428571]
|
|
|
|
mean value: 0.9082682132682133
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01315737 0.01237726 0.01252127 0.01203179 0.01235271 0.01230645
|
|
0.01251531 0.01241851 0.01265669 0.01256299]
|
|
|
|
mean value: 0.012490034103393555
|
|
|
|
key: score_time
|
|
value: [0.01045203 0.00614882 0.01162457 0.01147652 0.01155424 0.0219295
|
|
0.01169086 0.01164365 0.01157975 0.01150036]
|
|
|
|
mean value: 0.011960029602050781
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.06666667 0.46666667 0.46666667 0.77459667
|
|
0.74535599 -0.29277002 0.25819889 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.9703421 0.86343483 0.81649844 0.79294273 0.82385255
|
|
0.70957488 0.78854022 0.79294273 1. ]
|
|
|
|
mean value: 0.8558128479158662
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.5 0.75 0.75 0.875
|
|
0.875 0.5 0.625 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98591549 0.92957746 0.91549296 0.88732394 0.91549296
|
|
0.85915493 0.90140845 0.88732394 1. ]
|
|
|
|
mean value: 0.928169014084507
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.5 0.66666667 0.66666667 0.85714286
|
|
0.8 0. 0.57142857 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98113208 0.9122807 0.88 0.86666667 0.86956522
|
|
0.76190476 0.85106383 0.86666667 1. ]
|
|
|
|
mean value: 0.8989279919642718
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.4 0.66666667 0.66666667 0.75
|
|
1. 0. 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.96296296 0.83870968 0.91666667 0.76470588 1.
|
|
1. 0.95238095 0.76470588 1. ]
|
|
|
|
mean value: 0.9200132024135819
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.66666667 1.
|
|
0.66666667 0. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.84615385 1. 0.76923077
|
|
0.61538462 0.76923077 1. 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.53333333 0.73333333 0.73333333 0.9
|
|
0.83333333 0.4 0.63333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.94444444 0.9008547 0.91111111 0.88461538
|
|
0.80769231 0.87350427 0.91111111 1. ]
|
|
|
|
mean value: 0.9222222222222222
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.33333333 0.5 0.5 0.75
|
|
0.66666667 0. 0.4 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.96296296 0.83870968 0.78571429 0.76470588 0.76923077
|
|
0.61538462 0.74074074 0.76470588 1. ]
|
|
|
|
mean value: 0.8242154816158611
|
|
|
|
MCC on Blind test: -0.11
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08493304 0.08175111 0.08369136 0.08218789 0.08108163 0.08307648
|
|
0.08458972 0.08156514 0.08213234 0.08229351]
|
|
|
|
mean value: 0.08273022174835205
|
|
|
|
key: score_time
|
|
value: [0.01478672 0.00491738 0.01560259 0.01490855 0.015692 0.0162847
|
|
0.01612735 0.0150218 0.01616716 0.01500249]
|
|
|
|
mean value: 0.014451074600219726
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.25819889 1. 1. 0.6
|
|
1. 1. 0.74535599 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.625 1. 1. 0.75
|
|
1. 1. 0.875 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.57142857 1. 1. 0.75
|
|
1. 1. 0.8 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.5 1. 1. 0.6 1. 1. 1. 0.5]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 1. 1.
|
|
1. 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.63333333 1. 1. 0.8
|
|
1. 1. 0.83333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.4 1. 1. 0.6
|
|
1. 1. 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03491879 0.02965927 0.05070114 0.04325414 0.03710151 0.04708791
|
|
0.04051805 0.05239248 0.0487051 0.02523303]
|
|
|
|
mean value: 0.04095714092254639
|
|
|
|
key: score_time
|
|
value: [0.02097654 0.00495124 0.03698468 0.01774836 0.03910136 0.03681517
|
|
0.02154684 0.03762245 0.01965308 0.01705694]
|
|
|
|
mean value: 0.02524566650390625
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.74535599 1. 1. 0.6
|
|
0.74535599 1. 0.74535599 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.96986363 0.96986363 1. 0.96986363 0.94010481 1.
|
|
1. 1. 0.96986363 1. ]
|
|
|
|
mean value: 0.9819559310958537
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.875 1. 1. 0.75
|
|
0.875 1. 0.875 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98591549 0.98591549 1. 0.98591549 0.97183099 1.
|
|
1. 1. 0.98591549 1. ]
|
|
|
|
mean value: 0.9915492957746479
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.8 1. 1. 0.75 0.8 1. 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98039216 0.98039216 1. 0.98039216 0.96 1.
|
|
1. 1. 0.98039216 1. ]
|
|
|
|
mean value: 0.9881568627450981
|
|
|
|
key: test_precision
|
|
value: [1. nan 1. 1. 1. 0.6
|
|
1. 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 1. 1.
|
|
0.66666667 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.96153846 0.96153846 1. 0.96153846 0.92307692 1.
|
|
1. 1. 0.96153846 1. ]
|
|
|
|
mean value: 0.9769230769230769
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.83333333 1. 1. 0.8
|
|
0.83333333 1. 0.83333333 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98076923 0.98076923 1. 0.98076923 0.96153846 1.
|
|
1. 1. 0.98076923 1. ]
|
|
|
|
mean value: 0.9884615384615385
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.66666667 1. 1. 0.6
|
|
0.66666667 1. 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.96153846 0.96153846 1. 0.96153846 0.92307692 1.
|
|
1. 1. 0.96153846 1. ]
|
|
|
|
mean value: 0.9769230769230769
|
|
|
|
MCC on Blind test: 0.95
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0122931 0.01473594 0.01660824 0.01521087 0.0151813 0.01513147
|
|
0.02177501 0.01516676 0.01508951 0.01509905]
|
|
|
|
mean value: 0.015629124641418458
|
|
|
|
key: score_time
|
|
value: [0.01127362 0.00562072 0.01160932 0.01198483 0.01171732 0.01169324
|
|
0.01196361 0.01168847 0.01174283 0.01177049]
|
|
|
|
mean value: 0.011106443405151368
|
|
|
|
key: test_mcc
|
|
value: [ 0.1490712 nan 0.74535599 -0.4472136 0.48795004 0.77459667
|
|
0.1490712 -0.29277002 0.48795004 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 nan 0.875 0.375 0.75 0.875
|
|
0.625 0.5 0.75 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 nan 0.8 0. 0.5 0.85714286
|
|
0.4 0. 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 nan 1. 0. 1. 0.75
|
|
0.5 0. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0. 0.33333333 1.
|
|
0.33333333 0. 0.33333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.56666667 nan 0.83333333 0.3 0.66666667 0.9
|
|
0.56666667 0.4 0.66666667 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 nan 0.66666667 0. 0.33333333 0.75
|
|
0.25 0. 0.33333333 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.16718197 0.15094709 0.1711874 0.15357113 0.15574336 0.15652514
|
|
0.15600491 0.15759158 0.1278398 0.17194462]
|
|
|
|
mean value: 0.15685369968414306
|
|
|
|
key: score_time
|
|
value: [0.00918221 0.00462985 0.00915265 0.00921178 0.00939751 0.00919819
|
|
0.00938702 0.00930691 0.00974512 0.00909448]
|
|
|
|
mean value: 0.008830571174621582
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.46666667 0.77459667 1. 0.6
|
|
0.74535599 1. 0.74535599 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.75 0.875 1. 0.75
|
|
0.875 1. 0.875 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.66666667 0.85714286 1. 0.75
|
|
0.8 1. 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.75 1. 0.6
|
|
1. 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 1. 1.
|
|
0.66666667 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.73333333 0.9 1. 0.8
|
|
0.83333333 1. 0.83333333 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.5 0.75 1. 0.6
|
|
0.66666667 1. 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01070952 0.01322937 0.01341176 0.02545214 0.02427244 0.02728081
|
|
0.0287056 0.0138135 0.0139854 0.01462626]
|
|
|
|
mean value: 0.01854867935180664
|
|
|
|
key: score_time
|
|
value: [0.01166081 0.00594354 0.01179552 0.02356005 0.02031541 0.0121429
|
|
0.01230502 0.0118804 0.01562524 0.02522063]
|
|
|
|
mean value: 0.015044951438903808
|
|
|
|
key: test_mcc
|
|
value: [-0.46666667 nan -0.29277002 -0.29277002 -0.25819889 0.1490712
|
|
-0.46666667 -0.29277002 -0.4472136 -0.25819889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.25 nan 0.5 0.5 0.375 0.625
|
|
0.25 0.5 0.375 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.25 nan 0. 0. 0.28571429 0.4
|
|
0.25 0. 0. 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.2 nan 0. 0. 0.25 0.5 0.2 0. 0. 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0. 0. 0.33333333 0.33333333
|
|
0.33333333 0. 0. 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.26666667 nan 0.4 0.4 0.36666667 0.56666667
|
|
0.26666667 0.4 0.3 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.14285714 nan 0. 0. 0.16666667 0.25
|
|
0.14285714 0. 0. 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.14
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02903557 0.03166127 0.02882695 0.03186989 0.03176379 0.03159523
|
|
0.03179622 0.03586578 0.03634501 0.0317626 ]
|
|
|
|
mean value: 0.032052230834960935
|
|
|
|
key: score_time
|
|
value: [0.0200243 0.01208067 0.01757097 0.01424813 0.02132797 0.01160336
|
|
0.02168918 0.02083015 0.02316499 0.0211401 ]
|
|
|
|
mean value: 0.018367981910705565
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.46666667 0.6 0.74535599 0.6
|
|
0.77459667 0.1490712 0.46666667 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.9703421 0.9703421 0.9703421 0.9703421 1.
|
|
0.9703421 0.9703421 1. ]
|
|
|
|
mean value: 0.9822052584466778
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.75 0.75 0.875 0.75
|
|
0.875 0.625 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.98591549 0.98591549 0.98591549 0.98591549
|
|
1. 0.98591549 0.98591549 1. ]
|
|
|
|
mean value: 0.9915492957746479
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.66666667 0.75 0.8 0.75
|
|
0.85714286 0.4 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.98113208 0.98113208 0.98113208 0.98113208
|
|
1. 0.98113208 0.98113208 1. ]
|
|
|
|
mean value: 0.9886792452830189
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.6 1. 0.6
|
|
0.75 0.5 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.96296296 0.96296296 0.96296296 0.96296296
|
|
1. 0.96296296 0.96296296 1. ]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 0.66666667 1.
|
|
1. 0.33333333 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.73333333 0.8 0.83333333 0.8
|
|
0.9 0.56666667 0.73333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:115: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:118: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[1. 1. 0.98888889 0.98888889 0.98888889 0.98888889
|
|
1. 0.98888889 0.98888889 1. ]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.5 0.6 0.66666667 0.6
|
|
0.75 0.25 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.96296296 0.96296296 0.96296296 0.96296296
|
|
1. 0.96296296 0.96296296 1. ]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.17595625 0.17720246 0.23368216 0.2604897 0.20024538 0.22788954
|
|
0.17445135 0.18081164 0.20982456 0.23728633]
|
|
|
|
mean value: 0.20778393745422363
|
|
|
|
key: score_time
|
|
value: [0.0230782 0.01266408 0.01375008 0.02251482 0.01994252 0.0219214
|
|
0.0211935 0.02238035 0.02076578 0.01844668]
|
|
|
|
mean value: 0.019665741920471193
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.46666667 0.6 0.74535599 0.6
|
|
0.77459667 0.1490712 0.46666667 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.9703421 0.9703421 0.9703421 0.9703421 1.
|
|
0.9703421 0.9703421 1. ]
|
|
|
|
mean value: 0.9822052584466778
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.75 0.75 0.875 0.75
|
|
0.875 0.625 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.98591549 0.98591549 0.98591549 0.98591549
|
|
1. 0.98591549 0.98591549 1. ]
|
|
|
|
mean value: 0.9915492957746479
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.66666667 0.75 0.8 0.75
|
|
0.85714286 0.4 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.98113208 0.98113208 0.98113208 0.98113208
|
|
1. 0.98113208 0.98113208 1. ]
|
|
|
|
mean value: 0.9886792452830189
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.6 1. 0.6
|
|
0.75 0.5 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.96296296 0.96296296 0.96296296 0.96296296
|
|
1. 0.96296296 0.96296296 1. ]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 0.66666667 1.
|
|
1. 0.33333333 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.73333333 0.8 0.83333333 0.8
|
|
0.9 0.56666667 0.73333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.98888889 0.98888889 0.98888889 0.98888889
|
|
1. 0.98888889 0.98888889 1. ]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.5 0.6 0.66666667 0.6
|
|
0.75 0.25 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.96296296 0.96296296 0.96296296 0.96296296
|
|
1. 0.96296296 0.96296296 1. ]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02724361 0.02601719 0.02918768 0.02592826 0.02982807 0.02673316
|
|
0.02467227 0.02559614 0.02310491 0.02411938]
|
|
|
|
mean value: 0.026243066787719725
|
|
|
|
key: score_time
|
|
value: [0.0119884 0.00643563 0.01028681 0.01191902 0.01183963 0.01192975
|
|
0.01201987 0.0121851 0.01160169 0.0118041 ]
|
|
|
|
mean value: 0.011201000213623047
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 nan nan 0.81649658 0.6 0.81649658
|
|
0.65465367 0.81649658 0.21821789 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.91111111 0.91111111 0.88910845 0.91111111 0.91111111 0.91111111
|
|
0.93356387 0.88910845 0.91111111 0.88910845]
|
|
|
|
mean value: 0.9067555884909202
|
|
|
|
key: test_accuracy
|
|
value: [0.8 nan nan 0.9 0.8 0.9 0.8 0.9 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.95555556 0.95555556 0.94444444 0.95555556 0.95555556 0.95555556
|
|
0.96666667 0.94444444 0.95555556 0.94444444]
|
|
|
|
mean value: 0.9533333333333334
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 nan nan 0.90909091 0.8 0.88888889
|
|
0.75 0.90909091 0.66666667 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.95555556 0.95555556 0.94505495 0.95555556 0.95555556 0.95555556
|
|
0.96703297 0.94505495 0.95555556 0.94505495]
|
|
|
|
mean value: 0.9535531135531136
|
|
|
|
key: test_precision
|
|
value: [0.71428571 nan nan 0.83333333 0.8 1.
|
|
1. 0.83333333 0.57142857 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.95555556 0.95555556 0.93478261 0.95555556 0.95555556 0.95555556
|
|
0.95652174 0.93478261 0.95555556 0.93478261]
|
|
|
|
mean value: 0.9494202898550725
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 0.8 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.95555556 0.95555556 0.95555556 0.95555556 0.95555556 0.95555556
|
|
0.97777778 0.95555556 0.95555556 0.95555556]
|
|
|
|
mean value: 0.9577777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 nan nan 0.9 0.8 0.9 0.8 0.9 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.95555556 0.95555556 0.94444444 0.95555556 0.95555556 0.95555556
|
|
0.96666667 0.94444444 0.95555556 0.94444444]
|
|
|
|
mean value: 0.9533333333333335
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 nan nan 0.83333333 0.66666667 0.8
|
|
0.6 0.83333333 0.5 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.91489362 0.91489362 0.89583333 0.91489362 0.91489362 0.91489362
|
|
0.93617021 0.89583333 0.91489362 0.89583333]
|
|
|
|
mean value: 0.9113031914893617
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.59747982 0.64376473 0.71673417 0.69791865 0.47388649 1.05191803
|
|
0.63061428 0.58212113 0.71103501 0.60764217]
|
|
|
|
mean value: 0.6713114500045776
|
|
|
|
key: score_time
|
|
value: [0.01522326 0.0066607 0.00642395 0.01201344 0.01186037 0.01513076
|
|
0.01198053 0.01196361 0.01532221 0.01208282]
|
|
|
|
mean value: 0.011866164207458497
|
|
|
|
key: test_mcc
|
|
value: [0.6 nan nan 0.81649658 0.40824829 0.81649658
|
|
0.65465367 0.81649658 0.40824829 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.93356387 0.95555556 0.97801929 0.73333333 1.
|
|
0.93356387 0.97801929 1. 0.95650071]
|
|
|
|
mean value: 0.9468555933963895
|
|
|
|
key: test_accuracy
|
|
value: [0.8 nan nan 0.9 0.7 0.9 0.8 0.9 0.7 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.96666667 0.97777778 0.98888889 0.86666667 1.
|
|
0.96666667 0.98888889 1. 0.97777778]
|
|
|
|
mean value: 0.9733333333333334
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan nan 0.90909091 0.66666667 0.88888889
|
|
0.75 0.90909091 0.72727273 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.96703297 0.97777778 0.98901099 0.86666667 1.
|
|
0.96703297 0.98901099 1. 0.97826087]
|
|
|
|
mean value: 0.9734793226097574
|
|
|
|
key: test_precision
|
|
value: [0.8 nan nan 0.83333333 0.75 1.
|
|
1. 0.83333333 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.95652174 0.97777778 0.97826087 0.86666667 1.
|
|
0.95652174 0.97826087 1. 0.95744681]
|
|
|
|
mean value: 0.9671456470346387
|
|
|
|
key: test_recall
|
|
value: [0.8 nan nan 1. 0.6 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 0.97777778 0.97777778 1. 0.86666667 1.
|
|
0.97777778 1. 1. 1. ]
|
|
|
|
mean value: 0.98
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 nan nan 0.9 0.7 0.9 0.8 0.9 0.7 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.96666667 0.97777778 0.98888889 0.86666667 1.
|
|
0.96666667 0.98888889 1. 0.97777778]
|
|
|
|
mean value: 0.9733333333333334
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan nan 0.83333333 0.5 0.8
|
|
0.6 0.83333333 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.93617021 0.95652174 0.97826087 0.76470588 1.
|
|
0.93617021 0.97826087 1. 0.95744681]
|
|
|
|
mean value: 0.9507536594656364
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0123117 0.00990081 0.00964355 0.0083921 0.00831342 0.00837278
|
|
0.00830507 0.00861096 0.00842547 0.0087893 ]
|
|
|
|
mean value: 0.00910651683807373
|
|
|
|
key: score_time
|
|
value: [0.02768016 0.0048759 0.00427985 0.00843048 0.00844884 0.0084765
|
|
0.00845861 0.00852132 0.00856638 0.00871158]
|
|
|
|
mean value: 0.0096449613571167
|
|
|
|
key: test_mcc
|
|
value: [-0.33333333 nan nan 0.2 0.5 0.21821789
|
|
0.40824829 0.5 0.21821789 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.46537892 0.44444444 0.70004007 0.73994007 0.67809594 0.641948
|
|
0.78478493 0.641948 0.82548988 0.65996633]
|
|
|
|
mean value: 0.6582036578842267
|
|
|
|
key: test_accuracy
|
|
value: [0.4 nan nan 0.6 0.7 0.6 0.7 0.7 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.7 0.67777778 0.84444444 0.86666667 0.82222222 0.8
|
|
0.88888889 0.8 0.91111111 0.81111111]
|
|
|
|
mean value: 0.8122222222222222
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 nan nan 0.6 0.76923077 0.66666667
|
|
0.66666667 0.76923077 0.66666667 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.76106195 0.75213675 0.85714286 0.875 0.84615385 0.83018868
|
|
0.89583333 0.83018868 0.90697674 0.83809524]
|
|
|
|
mean value: 0.8392778076441294
|
|
|
|
key: test_precision
|
|
value: [0.44444444 nan nan 0.6 0.625 0.57142857
|
|
0.75 0.625 0.57142857 0.55555556]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.63235294 0.61111111 0.79245283 0.82352941 0.74576271 0.72131148
|
|
0.84313725 0.72131148 0.95121951 0.73333333]
|
|
|
|
mean value: 0.7575522057355462
|
|
|
|
key: test_recall
|
|
value: [0.8 nan nan 0.6 1. 0.8 0.6 1. 0.8 1. ]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.95555556 0.97777778 0.93333333 0.93333333 0.97777778 0.97777778
|
|
0.95555556 0.97777778 0.86666667 0.97777778]
|
|
|
|
mean value: 0.9533333333333334
|
|
|
|
key: test_roc_auc
|
|
value: [0.4 nan nan 0.6 0.7 0.6 0.7 0.7 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.7 0.67777778 0.84444444 0.86666667 0.82222222 0.8
|
|
0.88888889 0.8 0.91111111 0.81111111]
|
|
|
|
mean value: 0.8122222222222223
|
|
|
|
key: test_jcc
|
|
value: [0.4 nan nan 0.42857143 0.625 0.5
|
|
0.5 0.625 0.5 0.55555556]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.61428571 0.60273973 0.75 0.77777778 0.73333333 0.70967742
|
|
0.81132075 0.70967742 0.82978723 0.72131148]
|
|
|
|
mean value: 0.7259910854303271
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00870204 0.00929213 0.00878859 0.00928044 0.00863266 0.00860143
|
|
0.00866652 0.00958157 0.0096643 0.00961232]
|
|
|
|
mean value: 0.009082198143005371
|
|
|
|
key: score_time
|
|
value: [0.00865149 0.00441241 0.00431967 0.00865722 0.00870681 0.00857425
|
|
0.00904512 0.00933361 0.00930762 0.00936174]
|
|
|
|
mean value: 0.008036994934082031
|
|
|
|
key: test_mcc
|
|
value: [0.21821789 nan nan 0.21821789 0.6 0.40824829
|
|
0.40824829 0.65465367 0. 0.40824829]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.73624773 0.71269665 0.67082039 0.8001976 0.76026311 0.75724019
|
|
0.73333333 0.68957028 0.8001976 0.69162666]
|
|
|
|
mean value: 0.7352193554584638
|
|
|
|
key: test_accuracy
|
|
value: [0.6 nan nan 0.6 0.8 0.7 0.7 0.8 0.5 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.86666667 0.85555556 0.83333333 0.9 0.87777778 0.87777778
|
|
0.86666667 0.84444444 0.9 0.84444444]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan nan 0.66666667 0.8 0.66666667
|
|
0.66666667 0.83333333 0.54545455 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.87234043 0.86021505 0.84210526 0.9010989 0.88421053 0.88172043
|
|
0.86666667 0.84782609 0.8988764 0.85106383]
|
|
|
|
mean value: 0.8706123587880272
|
|
|
|
key: test_precision
|
|
value: [0.66666667 nan nan 0.57142857 0.8 0.75
|
|
0.75 0.71428571 0.5 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.83673469 0.83333333 0.8 0.89130435 0.84 0.85416667
|
|
0.86666667 0.82978723 0.90909091 0.81632653]
|
|
|
|
mean value: 0.8477410382116012
|
|
|
|
key: test_recall
|
|
value: [0.4 nan nan 0.8 0.8 0.6 0.6 1. 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.91111111 0.88888889 0.88888889 0.91111111 0.93333333 0.91111111
|
|
0.86666667 0.86666667 0.88888889 0.88888889]
|
|
|
|
mean value: 0.8955555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 nan nan 0.6 0.8 0.7 0.7 0.8 0.5 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.86666667 0.85555556 0.83333333 0.9 0.87777778 0.87777778
|
|
0.86666667 0.84444444 0.9 0.84444444]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan nan 0.5 0.66666667 0.5
|
|
0.5 0.71428571 0.375 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.77358491 0.75471698 0.72727273 0.82 0.79245283 0.78846154
|
|
0.76470588 0.73584906 0.81632653 0.74074074]
|
|
|
|
mean value: 0.7714111193025098
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00962067 0.00819492 0.00826192 0.00823355 0.00825858 0.0082972
|
|
0.00824308 0.0086062 0.0086453 0.00830889]
|
|
|
|
mean value: 0.00846703052520752
|
|
|
|
key: score_time
|
|
value: [0.0149827 0.00423265 0.00422812 0.00957608 0.00942659 0.0144031
|
|
0.00948668 0.00979161 0.01269317 0.00941229]
|
|
|
|
mean value: 0.009823298454284668
|
|
|
|
key: test_mcc
|
|
value: [0.2 nan nan 0.5 0.5 0.
|
|
0.21821789 0.65465367 0. 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.69162666 0.76486616 0.62609903 0.68888889 0.67488191 0.69509522
|
|
0.64700558 0.73624773 0.76026311 0.69509522]
|
|
|
|
mean value: 0.6980069521372378
|
|
|
|
key: test_accuracy
|
|
value: [0.6 nan nan 0.7 0.7 0.5 0.6 0.8 0.5 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.84444444 0.87777778 0.81111111 0.84444444 0.83333333 0.84444444
|
|
0.82222222 0.86666667 0.87777778 0.84444444]
|
|
|
|
mean value: 0.8466666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.6 nan nan 0.76923077 0.76923077 0.54545455
|
|
0.66666667 0.83333333 0.61538462 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.85106383 0.88659794 0.82105263 0.84444444 0.84536082 0.85416667
|
|
0.82978723 0.87234043 0.88421053 0.85416667]
|
|
|
|
mean value: 0.8543191187920814
|
|
|
|
key: test_precision
|
|
value: [0.6 nan nan 0.625 0.625 0.5
|
|
0.57142857 0.71428571 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.81632653 0.82692308 0.78 0.84444444 0.78846154 0.80392157
|
|
0.79591837 0.83673469 0.84 0.80392157]
|
|
|
|
mean value: 0.8136651788920697
|
|
|
|
key: test_recall
|
|
value: [0.6 nan nan 1. 1. 0.6 0.8 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.88888889 0.95555556 0.86666667 0.84444444 0.91111111 0.91111111
|
|
0.86666667 0.91111111 0.93333333 0.91111111]
|
|
|
|
mean value: 0.9
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 nan nan 0.7 0.7 0.5 0.6 0.8 0.5 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.84444444 0.87777778 0.81111111 0.84444444 0.83333333 0.84444444
|
|
0.82222222 0.86666667 0.87777778 0.84444444]
|
|
|
|
mean value: 0.8466666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 nan nan 0.625 0.625 0.375
|
|
0.5 0.71428571 0.44444444 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.74074074 0.7962963 0.69642857 0.73076923 0.73214286 0.74545455
|
|
0.70909091 0.77358491 0.79245283 0.74545455]
|
|
|
|
mean value: 0.7462415432226753
|
|
|
|
MCC on Blind test: -0.04
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00986791 0.01017141 0.00918722 0.00919867 0.0091598 0.00916481
|
|
0.00927687 0.00930643 0.00905871 0.00913 ]
|
|
|
|
mean value: 0.00935218334197998
|
|
|
|
key: score_time
|
|
value: [0.00940585 0.0044322 0.00426841 0.00933194 0.0090754 0.00920653
|
|
0.0087769 0.0087471 0.0087831 0.00867248]
|
|
|
|
mean value: 0.008069992065429688
|
|
|
|
key: test_mcc
|
|
value: [0.2 nan nan 0.81649658 0.65465367 0.81649658
|
|
0.40824829 0.5 0.21821789 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.82548988 0.84465303 0.84465303 0.8230355 0.88910845 0.86666667
|
|
0.86666667 0.86666667 0.89087081 0.82222222]
|
|
|
|
mean value: 0.8540032913422
|
|
|
|
key: test_accuracy
|
|
value: [0.6 nan nan 0.9 0.8 0.9 0.7 0.7 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.91111111 0.92222222 0.92222222 0.91111111 0.94444444 0.93333333
|
|
0.93333333 0.93333333 0.94444444 0.91111111]
|
|
|
|
mean value: 0.9266666666666666
|
|
|
|
key: test_fscore
|
|
value: [0.6 nan nan 0.90909091 0.83333333 0.88888889
|
|
0.66666667 0.76923077 0.66666667 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.90697674 0.92307692 0.92307692 0.90909091 0.94505495 0.93333333
|
|
0.93333333 0.93333333 0.94252874 0.91111111]
|
|
|
|
mean value: 0.9260916291229042
|
|
|
|
key: test_precision
|
|
value: [0.6 nan nan 0.83333333 0.71428571 1.
|
|
0.75 0.625 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.95121951 0.91304348 0.91304348 0.93023256 0.93478261 0.93333333
|
|
0.93333333 0.93333333 0.97619048 0.91111111]
|
|
|
|
mean value: 0.9329623222853636
|
|
|
|
key: test_recall
|
|
value: [0.6 nan nan 1. 1. 0.8 0.6 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.86666667 0.93333333 0.93333333 0.88888889 0.95555556 0.93333333
|
|
0.93333333 0.93333333 0.91111111 0.91111111]
|
|
|
|
mean value: 0.92
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 nan nan 0.9 0.8 0.9 0.7 0.7 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.91111111 0.92222222 0.92222222 0.91111111 0.94444444 0.93333333
|
|
0.93333333 0.93333333 0.94444444 0.91111111]
|
|
|
|
mean value: 0.9266666666666666
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 nan nan 0.83333333 0.71428571 0.8
|
|
0.5 0.625 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.82978723 0.85714286 0.85714286 0.83333333 0.89583333 0.875
|
|
0.875 0.875 0.89130435 0.83673469]
|
|
|
|
mean value: 0.8626278656698572
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.37450528 0.35082269 0.39461231 0.50876474 0.3729012 0.39715028
|
|
0.39133334 0.37829828 0.58005452 0.37452221]
|
|
|
|
mean value: 0.4122964859008789
|
|
|
|
key: score_time
|
|
value: [0.01205969 0.00659204 0.00670195 0.01243973 0.01200485 0.01204276
|
|
0.01203871 0.01203322 0.012043 0.01204824]
|
|
|
|
mean value: 0.011000418663024902
|
|
|
|
key: test_mcc
|
|
value: [0.2 nan nan 0.5 0.6 0.81649658
|
|
0.40824829 0.65465367 0.21821789 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6 nan nan 0.7 0.8 0.9 0.7 0.8 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.6 nan nan 0.76923077 0.8 0.88888889
|
|
0.66666667 0.83333333 0.66666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.6 nan nan 0.625 0.8 1.
|
|
0.75 0.71428571 0.57142857 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.6 nan nan 1. 0.8 0.8 0.6 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 nan nan 0.7 0.8 0.9 0.7 0.8 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.42857143 nan nan 0.625 0.66666667 0.8
|
|
0.5 0.71428571 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0144136 0.01382351 0.01106048 0.01080775 0.0104351 0.01044345
|
|
0.01019263 0.00999427 0.0097239 0.01033235]
|
|
|
|
mean value: 0.011122703552246094
|
|
|
|
key: score_time
|
|
value: [0.01140714 0.00496936 0.00459981 0.00874352 0.00843453 0.00867224
|
|
0.00845146 0.0083878 0.00837374 0.00837708]
|
|
|
|
mean value: 0.008041667938232421
|
|
|
|
key: test_mcc
|
|
value: [1. nan nan 0.6 0.81649658 0.6
|
|
0.65465367 0.2 0.40824829 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. nan nan 0.8 0.9 0.8 0.8 0.6 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. nan nan 0.8 0.90909091 0.8
|
|
0.75 0.6 0.72727273 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 0.8 0.83333333 0.8
|
|
1. 0.6 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 0.8 1. 0.8 0.6 0.6 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. nan nan 0.8 0.9 0.8 0.8 0.6 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. nan nan 0.66666667 0.83333333 0.66666667
|
|
0.6 0.42857143 0.57142857 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08056545 0.08084106 0.08170795 0.08255649 0.08109879 0.08101583
|
|
0.08092213 0.08285975 0.08272982 0.08114171]
|
|
|
|
mean value: 0.0815438985824585
|
|
|
|
key: score_time
|
|
value: [0.01655602 0.00445127 0.0046792 0.01720977 0.01671934 0.01684332
|
|
0.01761675 0.01713228 0.0171361 0.01674366]
|
|
|
|
mean value: 0.014508771896362304
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.6 0.81649658 0.65465367
|
|
0.2 0.5 0.21821789 0.65465367]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.8 0.9 0.8 0.6 0.7 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 nan nan 0.8 0.90909091 0.75
|
|
0.6 0.76923077 0.66666667 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 0.8 0.83333333 1.
|
|
0.6 0.625 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 nan nan 0.8 1. 0.6 0.6 1. 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.8 0.9 0.8 0.6 0.7 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 nan nan 0.66666667 0.83333333 0.6
|
|
0.42857143 0.625 0.5 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00848937 0.00846291 0.00848842 0.00857568 0.00871515 0.00847149
|
|
0.0084784 0.00839734 0.00870109 0.00886941]
|
|
|
|
mean value: 0.008564925193786621
|
|
|
|
key: score_time
|
|
value: [0.00837517 0.00423932 0.00420213 0.00869727 0.00871015 0.00843763
|
|
0.00841641 0.00845981 0.00853753 0.00839472]
|
|
|
|
mean value: 0.007647013664245606
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 nan nan 0.40824829 0.65465367 0.65465367
|
|
0.40824829 0.33333333 0.81649658 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7 nan nan 0.7 0.8 0.8 0.7 0.6 0.9 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 nan nan 0.66666667 0.75 0.75
|
|
0.66666667 0.71428571 0.88888889 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 nan nan 0.75 1. 1.
|
|
0.75 0.55555556 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.6 nan nan 0.6 0.6 0.6 0.6 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 nan nan 0.7 0.8 0.8 0.7 0.6 0.9 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 nan nan 0.5 0.6 0.6
|
|
0.5 0.55555556 0.8 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.06
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.02116418 1.0257473 1.08174706 1.0322454 1.0248096 1.0253284
|
|
1.02173543 1.02932763 1.02692318 1.02705503]
|
|
|
|
mean value: 1.0316083192825318
|
|
|
|
key: score_time
|
|
value: [0.09387398 0.00442934 0.0045464 0.09425235 0.0929327 0.09006643
|
|
0.09262896 0.09255028 0.08933902 0.09436941]
|
|
|
|
mean value: 0.07489888668060303
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.65465367 0.81649658 0.81649658
|
|
0.65465367 0.65465367 0.40824829 0.65465367]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.8 0.8 0.7 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 nan nan 0.75 0.90909091 0.88888889
|
|
0.75 0.83333333 0.72727273 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 1. 0.83333333 1.
|
|
1. 0.71428571 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 nan nan 0.6 1. 0.8 0.6 1. 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.8 0.8 0.7 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 nan nan 0.6 0.83333333 0.8
|
|
0.6 0.71428571 0.57142857 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.80205727 0.85240149 0.84424591 0.98081374 0.8455205 0.89721966
|
|
0.84490728 0.85584474 0.9097147 0.88582087]
|
|
|
|
mean value: 0.8718546152114868
|
|
|
|
key: score_time
|
|
value: [0.20501709 0.00500822 0.0045805 0.19695258 0.12441659 0.21853828
|
|
0.22464013 0.22292161 0.20894432 0.14386797]
|
|
|
|
mean value: 0.1554887294769287
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.65465367 1. 0.81649658
|
|
0.65465367 0.65465367 0.40824829 0.65465367]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 0.97801929 0.95555556 0.97801929 0.97801929
|
|
0.97801929 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9801690612461116
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.8 1. 0.9 0.8 0.8 0.7 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 0.98888889 0.97777778 0.98888889 0.98888889
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 nan nan 0.75 1. 0.88888889
|
|
0.75 0.83333333 0.72727273 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 0.98901099 0.97777778 0.98901099 0.98876404
|
|
0.98901099 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9900607756787532
|
|
|
|
key: test_precision
|
|
value: [0.83333333 nan nan 1. 1. 1.
|
|
1. 0.71428571 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 0.97826087 0.97777778 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9847342995169082
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 0.6 1. 0.8 0.6 1. 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.97777778 1. 0.97777778
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9955555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.8 1. 0.9 0.8 0.8 0.7 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.98888889 0.97777778 0.98888889 0.98888889
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 nan nan 0.6 1. 0.8
|
|
0.6 0.71428571 0.57142857 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 0.97826087 0.95652174 0.97826087 0.97777778
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9803864734299517
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02338266 0.0094502 0.00960112 0.01006866 0.00941682 0.00956082
|
|
0.00956559 0.00960374 0.0094769 0.00946879]
|
|
|
|
mean value: 0.010959529876708984
|
|
|
|
key: score_time
|
|
value: [0.0109334 0.00468445 0.00506949 0.00934649 0.00933862 0.00940108
|
|
0.00930977 0.00933194 0.00933313 0.00930548]
|
|
|
|
mean value: 0.008605384826660156
|
|
|
|
key: test_mcc
|
|
value: [0.21821789 nan nan 0.21821789 0.6 0.40824829
|
|
0.40824829 0.65465367 0. 0.40824829]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.73624773 0.71269665 0.67082039 0.8001976 0.76026311 0.75724019
|
|
0.73333333 0.68957028 0.8001976 0.69162666]
|
|
|
|
mean value: 0.7352193554584638
|
|
|
|
key: test_accuracy
|
|
value: [0.6 nan nan 0.6 0.8 0.7 0.7 0.8 0.5 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.86666667 0.85555556 0.83333333 0.9 0.87777778 0.87777778
|
|
0.86666667 0.84444444 0.9 0.84444444]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan nan 0.66666667 0.8 0.66666667
|
|
0.66666667 0.83333333 0.54545455 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.87234043 0.86021505 0.84210526 0.9010989 0.88421053 0.88172043
|
|
0.86666667 0.84782609 0.8988764 0.85106383]
|
|
|
|
mean value: 0.8706123587880272
|
|
|
|
key: test_precision
|
|
value: [0.66666667 nan nan 0.57142857 0.8 0.75
|
|
0.75 0.71428571 0.5 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.83673469 0.83333333 0.8 0.89130435 0.84 0.85416667
|
|
0.86666667 0.82978723 0.90909091 0.81632653]
|
|
|
|
mean value: 0.8477410382116012
|
|
|
|
key: test_recall
|
|
value: [0.4 nan nan 0.8 0.8 0.6 0.6 1. 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.91111111 0.88888889 0.88888889 0.91111111 0.93333333 0.91111111
|
|
0.86666667 0.86666667 0.88888889 0.88888889]
|
|
|
|
mean value: 0.8955555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 nan nan 0.6 0.8 0.7 0.7 0.8 0.5 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.86666667 0.85555556 0.83333333 0.9 0.87777778 0.87777778
|
|
0.86666667 0.84444444 0.9 0.84444444]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan nan 0.5 0.66666667 0.5
|
|
0.5 0.71428571 0.375 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.77358491 0.75471698 0.72727273 0.82 0.79245283 0.78846154
|
|
0.76470588 0.73584906 0.81632653 0.74074074]
|
|
|
|
mean value: 0.7714111193025098
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.03791428 0.039891 0.04524422 0.05228448 0.03879333 0.04074168
|
|
0.03861666 0.04192305 0.03786802 0.04126 ]
|
|
|
|
mean value: 0.0414536714553833
|
|
|
|
key: score_time
|
|
value: [0.01134562 0.0051043 0.00507307 0.01123905 0.01100111 0.0111165
|
|
0.01112223 0.01104426 0.0112226 0.01109934]
|
|
|
|
mean value: 0.009936809539794922
|
|
|
|
key: test_mcc
|
|
value: [1. nan nan 1. 1. 0.6
|
|
0.81649658 0.6 0.5 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. nan nan 1. 1. 0.8 0.9 0.8 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. nan nan 1. 1. 0.8
|
|
0.88888889 0.8 0.76923077 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 1. 1. 0.8 1. 0.8 0.625 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.8 0.8 0.8 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. nan nan 1. 1. 0.8 0.9 0.8 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. nan nan 1. 1. 0.66666667
|
|
0.8 0.66666667 0.625 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02424765 0.03926849 0.02169585 0.02134705 0.03105235 0.01888251
|
|
0.01905227 0.04464221 0.01857352 0.03482223]
|
|
|
|
mean value: 0.027358412742614746
|
|
|
|
key: score_time
|
|
value: [0.02080226 0.00596833 0.00588465 0.01170087 0.01165652 0.01164246
|
|
0.01558542 0.01165223 0.01164913 0.02148533]
|
|
|
|
mean value: 0.012802720069885254
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 nan nan 0.65465367 0.81649658 0.2
|
|
0.21821789 0.6 0.40824829 0.40824829]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 nan nan 0.8 0.9 0.6 0.6 0.8 0.7 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 nan nan 0.83333333 0.90909091 0.6
|
|
0.66666667 0.8 0.72727273 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 nan nan 0.71428571 0.83333333 0.6
|
|
0.57142857 0.8 0.66666667 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.6 0.8 0.8 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 nan nan 0.8 0.9 0.6 0.6 0.8 0.7 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 nan nan 0.71428571 0.83333333 0.42857143
|
|
0.5 0.66666667 0.57142857 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.01784444 0.00874496 0.00863242 0.00858641 0.00845957 0.00878644
|
|
0.00931096 0.00858283 0.00858426 0.0092175 ]
|
|
|
|
mean value: 0.009674978256225587
|
|
|
|
key: score_time
|
|
value: [0.00886011 0.00435925 0.0042181 0.00851774 0.00852823 0.00907302
|
|
0.00854731 0.00856113 0.00867009 0.00898337]
|
|
|
|
mean value: 0.007831835746765136
|
|
|
|
key: test_mcc
|
|
value: [0. nan nan 0.40824829 0.2 0.81649658
|
|
0.40824829 0.81649658 0.21821789 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.68957028 0.69162666 0.67082039 0.68957028 0.69162666 0.64508188
|
|
0.73405869 0.68957028 0.75724019 0.6681531 ]
|
|
|
|
mean value: 0.6927318416110331
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan nan 0.7 0.6 0.9 0.7 0.9 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.84444444 0.84444444 0.83333333 0.84444444 0.84444444 0.82222222
|
|
0.86666667 0.84444444 0.87777778 0.83333333]
|
|
|
|
mean value: 0.8455555555555556
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 nan nan 0.72727273 0.6 0.88888889
|
|
0.66666667 0.90909091 0.66666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.84090909 0.85106383 0.84210526 0.84782609 0.85106383 0.82608696
|
|
0.86956522 0.84782609 0.87356322 0.83870968]
|
|
|
|
mean value: 0.84887192572777
|
|
|
|
key: test_precision
|
|
value: [0.5 nan nan 0.66666667 0.6 1.
|
|
0.75 0.83333333 0.57142857 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.86046512 0.81632653 0.8 0.82978723 0.81632653 0.80851064
|
|
0.85106383 0.82978723 0.9047619 0.8125 ]
|
|
|
|
mean value: 0.8329529018435677
|
|
|
|
key: test_recall
|
|
value: [0.6 nan nan 0.8 0.6 0.8 0.6 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.82222222 0.88888889 0.88888889 0.86666667 0.88888889 0.84444444
|
|
0.88888889 0.86666667 0.84444444 0.86666667]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 nan nan 0.7 0.6 0.9 0.7 0.9 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.84444444 0.84444444 0.83333333 0.84444444 0.84444444 0.82222222
|
|
0.86666667 0.84444444 0.87777778 0.83333333]
|
|
|
|
mean value: 0.8455555555555556
|
|
|
|
key: test_jcc
|
|
value: [0.375 nan nan 0.57142857 0.42857143 0.8
|
|
0.5 0.83333333 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.7254902 0.74074074 0.72727273 0.73584906 0.74074074 0.7037037
|
|
0.76923077 0.73584906 0.7755102 0.72222222]
|
|
|
|
mean value: 0.7376609417278515
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00957155 0.01297784 0.01338482 0.01337719 0.01387429 0.01330495
|
|
0.01264739 0.01446295 0.01269341 0.01309991]
|
|
|
|
mean value: 0.01293942928314209
|
|
|
|
key: score_time
|
|
value: [0.0086 0.00586104 0.00588679 0.01135945 0.01134872 0.01132274
|
|
0.01132727 0.01135826 0.01136208 0.01132488]
|
|
|
|
mean value: 0.009975123405456542
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 nan nan 0.81649658 0.6 0.81649658
|
|
0.40824829 0.65465367 0.21821789 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.95555556 0.93541435 0.93541435 1. 0.95650071 0.97801929
|
|
0.93541435 0.97801929 0.95555556 0.93541435]
|
|
|
|
mean value: 0.9565307800167633
|
|
|
|
key: test_accuracy
|
|
value: [0.7 nan nan 0.9 0.8 0.9 0.7 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.97777778 0.96666667 0.96666667 1. 0.97777778 0.98888889
|
|
0.96666667 0.98888889 0.97777778 0.96666667]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 nan nan 0.90909091 0.8 0.88888889
|
|
0.66666667 0.83333333 0.66666667 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.97777778 0.96774194 0.96774194 1. 0.97826087 0.98901099
|
|
0.96774194 0.98876404 0.97777778 0.96774194]
|
|
|
|
mean value: 0.9782559201011066
|
|
|
|
key: test_precision
|
|
value: [0.66666667 nan nan 0.83333333 0.8 1.
|
|
0.75 0.71428571 0.57142857 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.97777778 0.9375 0.9375 1. 0.95744681 0.97826087
|
|
0.9375 1. 0.97777778 0.9375 ]
|
|
|
|
mean value: 0.9641263233631411
|
|
|
|
key: test_recall
|
|
value: [0.8 nan nan 1. 0.8 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.97777778 1. 1. 1. 1. 1.
|
|
1. 0.97777778 0.97777778 1. ]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 nan nan 0.9 0.8 0.9 0.7 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.97777778 0.96666667 0.96666667 1. 0.97777778 0.98888889
|
|
0.96666667 0.98888889 0.97777778 0.96666667]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 nan nan 0.83333333 0.66666667 0.8
|
|
0.5 0.71428571 0.5 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.95652174 0.9375 0.9375 1. 0.95744681 0.97826087
|
|
0.9375 0.97777778 0.95652174 0.9375 ]
|
|
|
|
mean value: 0.9576528934114503
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01255083 0.01222014 0.01237512 0.0122931 0.01268792 0.01234984
|
|
0.01206565 0.01226902 0.01228833 0.01291275]
|
|
|
|
mean value: 0.012401270866394042
|
|
|
|
key: score_time
|
|
value: [0.01079416 0.00594401 0.00597167 0.01558065 0.01139426 0.01133919
|
|
0.01131606 0.01139021 0.01131845 0.01140761]
|
|
|
|
mean value: 0.010645627975463867
|
|
|
|
key: test_mcc
|
|
value: [0. nan nan 0.40824829 0.6 0.81649658
|
|
0.40824829 1. 0.21821789 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.83553169 0.65465367 0.85485041 0.70710678 0.97801929 0.97801929
|
|
0.93541435 0.89442719 0.97801929 0.6894997 ]
|
|
|
|
mean value: 0.8505541676741646
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan nan 0.7 0.8 0.9 0.7 1. 0.6 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.91111111 0.8 0.92222222 0.83333333 0.98888889 0.98888889
|
|
0.96666667 0.94444444 0.98888889 0.82222222]
|
|
|
|
mean value: 0.9166666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.44444444 nan nan 0.66666667 0.8 0.88888889
|
|
0.66666667 1. 0.66666667 0.76923077]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.90243902 0.75 0.91566265 0.8 0.98876404 0.98876404
|
|
0.96774194 0.94117647 0.98901099 0.8490566 ]
|
|
|
|
mean value: 0.9092615763736974
|
|
|
|
key: test_precision
|
|
value: [0.5 nan nan 0.75 0.8 1.
|
|
0.75 1. 0.57142857 0.625 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.9375 1. 0.97826087 0.73770492]
|
|
|
|
mean value: 0.9653465787598005
|
|
|
|
key: test_recall
|
|
value: [0.4 nan nan 0.6 0.8 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.82222222 0.6 0.84444444 0.66666667 0.97777778 0.97777778
|
|
1. 0.88888889 1. 1. ]
|
|
|
|
mean value: 0.8777777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 nan nan 0.7 0.8 0.9 0.7 1. 0.6 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.91111111 0.8 0.92222222 0.83333333 0.98888889 0.98888889
|
|
0.96666667 0.94444444 0.98888889 0.82222222]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: test_jcc
|
|
value: [0.28571429 nan nan 0.5 0.66666667 0.8
|
|
0.5 1. 0.5 0.625 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.82222222 0.6 0.84444444 0.66666667 0.97777778 0.97777778
|
|
0.9375 0.88888889 0.97826087 0.73770492]
|
|
|
|
mean value: 0.8431243565375782
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.38
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09700513 0.08642244 0.08795667 0.08845925 0.08627391 0.08648276
|
|
0.08725667 0.08758473 0.08776522 0.08776069]
|
|
|
|
mean value: 0.08829674720764161
|
|
|
|
key: score_time
|
|
value: [0.01467133 0.00474548 0.00496674 0.01501131 0.01460814 0.01463032
|
|
0.01517797 0.01503372 0.01542163 0.01494527]
|
|
|
|
mean value: 0.01292119026184082
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 nan nan 1. 1. 0.6
|
|
1. 0.81649658 0.65465367 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 nan nan 1. 1. 0.8 1. 0.9 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 nan nan 1. 1. 0.8
|
|
1. 0.90909091 0.83333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 nan nan 1. 1. 0.8
|
|
1. 0.83333333 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.8 1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 nan nan 1. 1. 0.8 1. 0.9 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 nan nan 1. 1. 0.66666667
|
|
1. 0.83333333 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03509688 0.04568958 0.0302496 0.03195786 0.02884889 0.03385544
|
|
0.05140829 0.04297328 0.03036499 0.0328958 ]
|
|
|
|
mean value: 0.03633406162261963
|
|
|
|
key: score_time
|
|
value: [0.02096653 0.00952625 0.00549054 0.02393532 0.02444291 0.02201509
|
|
0.03557968 0.02399278 0.02232742 0.02703047]
|
|
|
|
mean value: 0.021530699729919434
|
|
|
|
key: test_mcc
|
|
value: [1. nan nan 1. 1. 0.6
|
|
0.81649658 0.81649658 0.6 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.97801929 0.97801929 0.97801929 1. 1. 0.97801929
|
|
1. 1. 1. 0.97801929]
|
|
|
|
mean value: 0.9890096469218257
|
|
|
|
key: test_accuracy
|
|
value: [1. nan nan 1. 1. 0.8 0.9 0.9 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889
|
|
1. 1. 1. 0.98888889]
|
|
|
|
mean value: 0.9944444444444445
|
|
|
|
key: test_fscore
|
|
value: [1. nan nan 1. 1. 0.8
|
|
0.88888889 0.88888889 0.8 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98876404 0.98876404 0.98876404 1. 1. 0.98876404
|
|
1. 1. 1. 0.98876404]
|
|
|
|
mean value: 0.9943820224719101
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 1. 1. 0.8 1. 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.8 0.8 0.8 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.97777778 0.97777778 0.97777778 1. 1. 0.97777778
|
|
1. 1. 1. 0.97777778]
|
|
|
|
mean value: 0.9888888888888889
|
|
|
|
key: test_roc_auc
|
|
value: [1. nan nan 1. 1. 0.8 0.9 0.9 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889
|
|
1. 1. 1. 0.98888889]
|
|
|
|
mean value: 0.9944444444444445
|
|
|
|
key: test_jcc
|
|
value: [1. nan nan 1. 1. 0.66666667
|
|
0.8 0.8 0.66666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.97777778 0.97777778 0.97777778 1. 1. 0.97777778
|
|
1. 1. 1. 0.97777778]
|
|
|
|
mean value: 0.9888888888888889
|
|
|
|
MCC on Blind test: 1.0
|
|
|
|
Accuracy on Blind test: 1.0
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01417232 0.01547766 0.02389431 0.02053976 0.0161953 0.01604009
|
|
0.02661467 0.01625609 0.01613426 0.01619077]
|
|
|
|
mean value: 0.018151521682739258
|
|
|
|
key: score_time
|
|
value: [0.01128316 0.00590849 0.00611544 0.01191759 0.01187563 0.01181817
|
|
0.01201296 0.01186419 0.0119288 0.01185203]
|
|
|
|
mean value: 0.010657644271850586
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.40824829 0.40824829 0.65465367
|
|
0.2 0.5 0. 0.65465367]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.7 0.7 0.8 0.6 0.7 0.5 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 nan nan 0.72727273 0.72727273 0.75
|
|
0.6 0.76923077 0.61538462 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 nan nan 0.66666667 0.66666667 1.
|
|
0.6 0.625 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 0.8 0.8 0.6 0.6 1. 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.7 0.7 0.8 0.6 0.7 0.5 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 nan nan 0.57142857 0.57142857 0.6
|
|
0.42857143 0.625 0.44444444 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22346067 0.21064496 0.22775483 0.20194101 0.20313954 0.22486734
|
|
0.20193768 0.22959948 0.19129062 0.20147419]
|
|
|
|
mean value: 0.21161103248596191
|
|
|
|
key: score_time
|
|
value: [0.009547 0.00503063 0.00500417 0.00985026 0.01011467 0.01005173
|
|
0.00999117 0.01001072 0.01000142 0.00996733]
|
|
|
|
mean value: 0.0089569091796875
|
|
|
|
key: test_mcc
|
|
value: [1. nan nan 0.40824829 0.81649658 0.6
|
|
1. 0.81649658 0.40824829 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. nan nan 0.7 0.9 0.8 1. 0.9 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. nan nan 0.66666667 0.88888889 0.8
|
|
1. 0.88888889 0.72727273 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 0.75 1. 0.8
|
|
1. 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 0.6 0.8 0.8 1. 0.8 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. nan nan 0.7 0.9 0.8 1. 0.9 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. nan nan 0.5 0.8 0.66666667
|
|
1. 0.8 0.57142857 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01206875 0.01536727 0.01444435 0.0142355 0.01431799 0.01439047
|
|
0.01433372 0.01445889 0.01462865 0.01538968]
|
|
|
|
mean value: 0.014363527297973633
|
|
|
|
key: score_time
|
|
value: [0.01155353 0.00604582 0.0060041 0.01183033 0.01183915 0.01176929
|
|
0.0118506 0.01583171 0.01520729 0.01568818]
|
|
|
|
mean value: 0.011761999130249024
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 nan nan 0.21821789 0.81649658 0.40824829
|
|
0.6 0.65465367 0.40824829 0.21821789]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 nan nan 0.6 0.9 0.7 0.8 0.8 0.7 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 nan nan 0.66666667 0.90909091 0.66666667
|
|
0.8 0.83333333 0.72727273 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 nan nan 0.57142857 0.83333333 0.75
|
|
0.8 0.71428571 0.66666667 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 0.8 1. 0.6 0.8 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 nan nan 0.6 0.9 0.7 0.8 0.8 0.7 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 nan nan 0.5 0.83333333 0.5
|
|
0.66666667 0.71428571 0.57142857 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.14
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02940583 0.03634906 0.03245473 0.03262615 0.03238082 0.02871442
|
|
0.01274729 0.01276231 0.02434325 0.03222251]
|
|
|
|
mean value: 0.027400636672973634
|
|
|
|
key: score_time
|
|
value: [0.0238688 0.01215315 0.0120945 0.01822114 0.02281642 0.01169848
|
|
0.01157546 0.01160693 0.0200026 0.02159095]
|
|
|
|
mean value: 0.016562843322753908
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.65465367 0.81649658 0.81649658
|
|
0.40824829 0.81649658 0.21821789 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 0.97801929 1. 0.97801929 1.
|
|
0.97801929 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9868115763061909
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 0.98888889 1. 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9933333333333334
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 nan nan 0.83333333 0.90909091 0.88888889
|
|
0.66666667 0.90909091 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 0.98901099 1. 0.98901099 1.
|
|
0.98901099 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9934065934065934
|
|
|
|
key: test_precision
|
|
value: [0.83333333 nan nan 0.71428571 0.83333333 1.
|
|
0.75 0.83333333 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 0.97826087 1. 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9869565217391304
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.98888889 1. 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 nan nan 0.71428571 0.83333333 0.8
|
|
0.5 0.83333333 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 0.97826087 1. 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9869565217391304
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:136: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:139: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.10528541 0.17085743 0.17106438 0.21791244 0.18295407 0.1117785
|
|
0.12498665 0.18348122 0.09826803 0.16421723]
|
|
|
|
mean value: 0.15308053493499757
|
|
|
|
key: score_time
|
|
value: [0.01189971 0.01263165 0.01266694 0.02156377 0.01987004 0.01182532
|
|
0.0199995 0.02175546 0.01192117 0.02374363]
|
|
|
|
mean value: 0.0167877197265625
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.65465367 0.81649658 0.81649658
|
|
0.40824829 0.81649658 0.21821789 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 0.97801929 1. 0.97801929 1.
|
|
0.97801929 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9868115763061909
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 0.98888889 1. 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9933333333333334
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 nan nan 0.83333333 0.90909091 0.88888889
|
|
0.66666667 0.90909091 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 0.98901099 1. 0.98901099 1.
|
|
0.98901099 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9934065934065934
|
|
|
|
key: test_precision
|
|
value: [0.83333333 nan nan 0.71428571 0.83333333 1.
|
|
0.75 0.83333333 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 0.97826087 1. 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9869565217391304
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.98888889 1. 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 nan nan 0.71428571 0.83333333 0.8
|
|
0.5 0.83333333 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 0.97826087 1. 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9869565217391304
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0232265 0.02718496 0.02470827 0.04355407 0.02657723 0.02697539
|
|
0.02654147 0.02200747 0.03470922 0.02654457]
|
|
|
|
mean value: 0.028202915191650392
|
|
|
|
key: score_time
|
|
value: [0.01154375 0.01186514 0.00623894 0.01168394 0.01151109 0.01170611
|
|
0.01173449 0.01145792 0.01147342 0.01157451]
|
|
|
|
mean value: 0.011078929901123047
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.65465367 nan 0.40824829 0.81649658 0.81649658
|
|
0.40824829 0.65465367 0.21821789 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.88910845 0.91111111 0.88910845 0.91111111 0.93356387 0.88910845
|
|
0.93356387 0.88910845 0.95555556 0.88910845]
|
|
|
|
mean value: 0.9090447765314074
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.8 nan 0.7 0.9 0.9 0.7 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.94444444 0.95555556 0.94444444 0.95555556 0.96666667 0.94444444
|
|
0.96666667 0.94444444 0.97777778 0.94444444]
|
|
|
|
mean value: 0.9544444444444444
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.83333333 nan 0.72727273 0.90909091 0.88888889
|
|
0.66666667 0.83333333 0.66666667 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.94505495 0.95555556 0.94382022 0.95555556 0.96703297 0.94382022
|
|
0.96703297 0.94382022 0.97777778 0.94505495]
|
|
|
|
mean value: 0.9544525387222017
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.71428571 nan 0.66666667 0.83333333 1.
|
|
0.75 0.71428571 0.57142857 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.93478261 0.95555556 0.95454545 0.95555556 0.95652174 0.95454545
|
|
0.95652174 0.95454545 0.97777778 0.93478261]
|
|
|
|
mean value: 0.9535133948177427
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 0.8 1. 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
|
|
value: [0.95555556 0.95555556 0.93333333 0.95555556 0.97777778 0.93333333
|
|
0.97777778 0.93333333 0.97777778 0.95555556]
|
|
|
|
mean value: 0.9555555555555556
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.8 nan 0.7 0.9 0.9 0.7 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.94444444 0.95555556 0.94444444 0.95555556 0.96666667 0.94444444
|
|
0.96666667 0.94444444 0.97777778 0.94444444]
|
|
|
|
mean value: 0.9544444444444445
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.71428571 nan 0.57142857 0.83333333 0.8
|
|
0.5 0.71428571 0.5 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.89583333 0.91489362 0.89361702 0.91489362 0.93617021 0.89361702
|
|
0.93617021 0.89361702 0.95652174 0.89583333]
|
|
|
|
mean value: 0.9131167129201356
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.67746806 0.59781671 0.69200182 0.71163416 0.61749101 0.63935566
|
|
0.77134275 0.60597873 0.80454826 0.74892688]
|
|
|
|
mean value: 0.686656403541565
|
|
|
|
key: score_time
|
|
value: [0.01449347 0.01300931 0.00646234 0.0151124 0.01494288 0.01490331
|
|
0.01547623 0.0119102 0.01237154 0.01246715]
|
|
|
|
mean value: 0.01311488151550293
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.81649658 nan 0.2 0.65465367 1.
|
|
0.65465367 0.65465367 0.21821789 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.95650071 1. 1. ]
|
|
|
|
mean value: 0.9956500714595278
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 nan 0.6 0.8 1. 0.8 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.97777778 1. 1. ]
|
|
|
|
mean value: 0.9977777777777778
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.90909091 nan 0.6 0.83333333 1.
|
|
0.75 0.83333333 0.66666667 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.97826087 1. 1. ]
|
|
|
|
mean value: 0.9978260869565218
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 nan 0.6 0.71428571 1.
|
|
1. 0.71428571 0.57142857 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.95744681 1. 1. ]
|
|
|
|
mean value: 0.9957446808510638
|
|
|
|
key: test_recall
|
|
value: [1. 1. nan 0.6 1. 1. 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.9 nan 0.6 0.8 1. 0.8 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.97777778 1. 1. ]
|
|
|
|
mean value: 0.9977777777777778
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.83333333 nan 0.42857143 0.71428571 1.
|
|
0.6 0.71428571 0.5 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.95744681 1. 1. ]
|
|
|
|
mean value: 0.9957446808510638
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01217723 0.00952673 0.00885081 0.00973725 0.00852966 0.00841856
|
|
0.00850034 0.0093751 0.0097971 0.00986147]
|
|
|
|
mean value: 0.00947742462158203
|
|
|
|
key: score_time
|
|
value: [0.01179314 0.00913548 0.00443888 0.00915837 0.00861049 0.00860596
|
|
0.00853419 0.00947094 0.00863123 0.0086143 ]
|
|
|
|
mean value: 0.008699297904968262
|
|
|
|
key: test_mcc
|
|
value: [0. 0.40824829 nan 0.2 0.5 0.5
|
|
0. 0.40824829 0.21821789 0.21821789]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.43305953 0.66097134 0.68957028 0.82548988 0.60971232 0.56980288
|
|
0.73405869 0.6350529 0.73624773 0.56454844]
|
|
|
|
mean value: 0.6458513998944028
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.7 nan 0.6 0.7 0.7 0.5 0.7 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.68888889 0.82222222 0.84444444 0.91111111 0.8 0.77777778
|
|
0.86666667 0.81111111 0.86666667 0.77777778]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.61538462 0.72727273 nan 0.6 0.76923077 0.76923077
|
|
0.44444444 0.72727273 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.75 0.84 0.84782609 0.91489362 0.81632653 0.8
|
|
0.86956522 0.82828283 0.86046512 0.79591837]
|
|
|
|
mean value: 0.8323277763890184
|
|
|
|
key: test_precision
|
|
value: [0.5 0.66666667 nan 0.6 0.625 0.625
|
|
0.5 0.66666667 0.57142857 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.62686567 0.76363636 0.82978723 0.87755102 0.75471698 0.72727273
|
|
0.85106383 0.75925926 0.90243902 0.73584906]
|
|
|
|
mean value: 0.7828441168174185
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 nan 0.6 1. 1. 0.4 0.8 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.93333333 0.93333333 0.86666667 0.95555556 0.88888889 0.88888889
|
|
0.88888889 0.91111111 0.82222222 0.86666667]
|
|
|
|
mean value: 0.8955555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.7 nan 0.6 0.7 0.7 0.5 0.7 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.68888889 0.82222222 0.84444444 0.91111111 0.8 0.77777778
|
|
0.86666667 0.81111111 0.86666667 0.77777778]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.44444444 0.57142857 nan 0.42857143 0.625 0.625
|
|
0.28571429 0.57142857 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.6 0.72413793 0.73584906 0.84313725 0.68965517 0.66666667
|
|
0.76923077 0.70689655 0.75510204 0.66101695]
|
|
|
|
mean value: 0.7151692392544453
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00936604 0.00989652 0.00981975 0.00979877 0.00879645 0.00878477
|
|
0.00904775 0.0089612 0.00923443 0.00946689]
|
|
|
|
mean value: 0.009317255020141602
|
|
|
|
key: score_time
|
|
value: [0.0095799 0.00932693 0.0049901 0.00928187 0.00873828 0.00865293
|
|
0.00907826 0.0087533 0.00945044 0.00921941]
|
|
|
|
mean value: 0.008707141876220703
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.21821789 nan 0. 0.40824829 0.65465367
|
|
0.21821789 0. 0.40824829 0.40824829]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.57906602 0.73624773 0.57906602 0.60540551 0.60238451 0.56056066
|
|
0.64700558 0.67082039 0.62609903 0.64700558]
|
|
|
|
mean value: 0.6253661066190971
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.78888889 0.86666667 0.78888889 0.8 0.8 0.77777778
|
|
0.82222222 0.83333333 0.81111111 0.82222222]
|
|
|
|
mean value: 0.8111111111111111
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.66666667 nan 0.54545455 0.72727273 0.75
|
|
0.5 0.44444444 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.7816092 0.86046512 0.7816092 0.78571429 0.79069767 0.76190476
|
|
0.81395349 0.82352941 0.8 0.81395349]
|
|
|
|
mean value: 0.8013436617630212
|
|
|
|
key: test_precision
|
|
value: [1. 0.57142857 nan 0.5 0.66666667 1.
|
|
0.66666667 0.5 0.75 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.80952381 0.90243902 0.80952381 0.84615385 0.82926829 0.82051282
|
|
0.85365854 0.875 0.85 0.85365854]
|
|
|
|
mean value: 0.8449738675958188
|
|
|
|
key: test_recall
|
|
value: [0.6 0.8 nan 0.6 0.8 0.6 0.4 0.4 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.75555556 0.82222222 0.75555556 0.73333333 0.75555556 0.71111111
|
|
0.77777778 0.77777778 0.75555556 0.77777778]
|
|
|
|
mean value: 0.7622222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.78888889 0.86666667 0.78888889 0.8 0.8 0.77777778
|
|
0.82222222 0.83333333 0.81111111 0.82222222]
|
|
|
|
mean value: 0.8111111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5 nan 0.375 0.57142857 0.6
|
|
0.33333333 0.28571429 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.64150943 0.75510204 0.64150943 0.64705882 0.65384615 0.61538462
|
|
0.68627451 0.7 0.66666667 0.68627451]
|
|
|
|
mean value: 0.6693626187775545
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00885844 0.00982785 0.00806379 0.00800562 0.00807977 0.00805044
|
|
0.00798988 0.00812101 0.00803423 0.00798821]
|
|
|
|
mean value: 0.008301925659179688
|
|
|
|
key: score_time
|
|
value: [0.01715279 0.01941204 0.00412107 0.00909305 0.00904918 0.00910521
|
|
0.01407719 0.01381826 0.01290107 0.01411104]
|
|
|
|
mean value: 0.012284088134765624
|
|
|
|
key: test_mcc
|
|
value: [ 0.81649658 0.6 nan 0.40824829 0.40824829 0.
|
|
0.40824829 0. -0.33333333 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.6 0.62237591 0.66683134 0.60059347 0.67082039 0.53990552
|
|
0.64444444 0.57906602 0.71128676 0.51111111]
|
|
|
|
mean value: 0.6146434979838011
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 nan 0.7 0.7 0.5 0.7 0.5 0.4 0.5]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.8 0.81111111 0.83333333 0.8 0.83333333 0.76666667
|
|
0.82222222 0.78888889 0.85555556 0.75555556]
|
|
|
|
mean value: 0.8066666666666666
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.8 nan 0.66666667 0.72727273 0.44444444
|
|
0.66666667 0.54545455 0.57142857 0.44444444]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.8 0.81318681 0.83516484 0.79545455 0.84210526 0.78350515
|
|
0.82222222 0.7816092 0.85714286 0.75555556]
|
|
|
|
mean value: 0.8085946441926197
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 nan 0.75 0.66666667 0.5
|
|
0.75 0.5 0.44444444 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.8 0.80434783 0.82608696 0.81395349 0.8 0.73076923
|
|
0.82222222 0.80952381 0.84782609 0.75555556]
|
|
|
|
mean value: 0.8010285176008128
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 nan 0.6 0.8 0.4 0.6 0.6 0.8 0.4]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.8 0.82222222 0.84444444 0.77777778 0.88888889 0.84444444
|
|
0.82222222 0.75555556 0.86666667 0.75555556]
|
|
|
|
mean value: 0.8177777777777777
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 nan 0.7 0.7 0.5 0.7 0.5 0.4 0.5]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.8 0.81111111 0.83333333 0.8 0.83333333 0.76666667
|
|
0.82222222 0.78888889 0.85555556 0.75555556]
|
|
|
|
mean value: 0.8066666666666666
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.66666667 nan 0.5 0.57142857 0.28571429
|
|
0.5 0.375 0.4 0.28571429]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.66666667 0.68518519 0.71698113 0.66037736 0.72727273 0.6440678
|
|
0.69811321 0.64150943 0.75 0.60714286]
|
|
|
|
mean value: 0.6797316364953078
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0089519 0.0091002 0.00914049 0.00959015 0.01245856 0.01036382
|
|
0.01422167 0.01016212 0.01498318 0.01038194]
|
|
|
|
mean value: 0.010935401916503907
|
|
|
|
key: score_time
|
|
value: [0.00858164 0.00850725 0.00424981 0.01098704 0.01503611 0.00963306
|
|
0.01035309 0.00957203 0.01383162 0.0097847 ]
|
|
|
|
mean value: 0.010053634643554688
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.6 nan 0.40824829 0.65465367 1.
|
|
0.40824829 0.21821789 0.21821789 0.65465367]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.86666667 0.88910845 0.91201231 0.80178373 0.8675239 0.86666667
|
|
0.91201231 0.84632727 0.8675239 0.80178373]
|
|
|
|
mean value: 0.8631408926213697
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.8 nan 0.7 0.8 1. 0.7 0.6 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.93333333 0.94444444 0.95555556 0.9 0.93333333 0.93333333
|
|
0.95555556 0.92222222 0.93333333 0.9 ]
|
|
|
|
mean value: 0.9311111111111111
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.8 nan 0.66666667 0.83333333 1.
|
|
0.66666667 0.66666667 0.66666667 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.93333333 0.94382022 0.95454545 0.89655172 0.93181818 0.93333333
|
|
0.95652174 0.91954023 0.93181818 0.89655172]
|
|
|
|
mean value: 0.929783412685894
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.8 nan 0.75 0.71428571 1.
|
|
0.75 0.57142857 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.93333333 0.95454545 0.97674419 0.92857143 0.95348837 0.93333333
|
|
0.93617021 0.95238095 0.95348837 0.92857143]
|
|
|
|
mean value: 0.9450627073734447
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 nan 0.6 1. 1. 0.6 0.8 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.93333333 0.93333333 0.93333333 0.86666667 0.91111111 0.93333333
|
|
0.97777778 0.88888889 0.91111111 0.86666667]
|
|
|
|
mean value: 0.9155555555555556
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.8 nan 0.7 0.8 1. 0.7 0.6 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.93333333 0.94444444 0.95555556 0.9 0.93333333 0.93333333
|
|
0.95555556 0.92222222 0.93333333 0.9 ]
|
|
|
|
mean value: 0.9311111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.66666667 nan 0.5 0.71428571 1.
|
|
0.5 0.5 0.5 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.875 0.89361702 0.91304348 0.8125 0.87234043 0.875
|
|
0.91666667 0.85106383 0.87234043 0.8125 ]
|
|
|
|
mean value: 0.8694071847055196
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.44935966 0.68770981 0.63709402 0.90934205 1.31588888 0.48195362
|
|
0.40868592 0.56449747 0.35664296 0.61879086]
|
|
|
|
mean value: 0.6429965257644653
|
|
|
|
key: score_time
|
|
value: [0.01234674 0.01238608 0.00677371 0.01211309 0.01210904 0.01201606
|
|
0.01210451 0.01206374 0.01207805 0.0169487 ]
|
|
|
|
mean value: 0.01209397315979004
|
|
|
|
key: test_mcc
|
|
value: [0.6 0.81649658 nan 0.2 0.81649658 0.81649658
|
|
0.40824829 0.65465367 0.21821789 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 nan 0.6 0.9 0.9 0.7 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.90909091 nan 0.6 0.90909091 0.88888889
|
|
0.66666667 0.83333333 0.66666667 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.83333333 nan 0.6 0.83333333 1.
|
|
0.75 0.71428571 0.57142857 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 0.6 1. 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.9 nan 0.6 0.9 0.9 0.7 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.83333333 nan 0.42857143 0.83333333 0.8
|
|
0.5 0.71428571 0.5 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01760101 0.01270008 0.010602 0.01664591 0.01388907 0.0104475
|
|
0.01092863 0.01405859 0.01037669 0.01104903]
|
|
|
|
mean value: 0.012829852104187012
|
|
|
|
key: score_time
|
|
value: [0.01295376 0.00925541 0.00528264 0.01082516 0.01204586 0.00963569
|
|
0.01535797 0.00973344 0.00954556 0.00905228]
|
|
|
|
mean value: 0.010368776321411134
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 nan 0.81649658 1. 0.81649658
|
|
1. 0.21821789 0.65465367 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 nan 0.9 1. 0.9 1. 0.6 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 nan 0.88888889 1. 0.88888889
|
|
1. 0.66666667 0.83333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 nan 1. 1. 1.
|
|
1. 0.57142857 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. nan 0.8 1. 0.8 1. 0.8 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 nan 0.9 1. 0.9 1. 0.6 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 nan 0.8 1. 0.8
|
|
1. 0.5 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09140396 0.11081243 0.1061058 0.1074965 0.10002065 0.08645439
|
|
0.08668065 0.08711362 0.08664465 0.08567667]
|
|
|
|
mean value: 0.09484093189239502
|
|
|
|
key: score_time
|
|
value: [0.01851869 0.01910639 0.00513697 0.01879001 0.02097702 0.01896
|
|
0.01881933 0.01879549 0.01810431 0.01854086]
|
|
|
|
mean value: 0.01757490634918213
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.6 nan 0.6 0.81649658 0.81649658
|
|
0.40824829 0.40824829 0.40824829 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 nan 0.8 0.9 0.9 0.7 0.7 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.8 nan 0.8 0.90909091 0.88888889
|
|
0.66666667 0.72727273 0.72727273 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 nan 0.8 0.83333333 1.
|
|
0.75 0.66666667 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 nan 0.8 1. 0.8 0.6 0.8 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 nan 0.8 0.9 0.9 0.7 0.7 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.66666667 nan 0.66666667 0.83333333 0.8
|
|
0.5 0.57142857 0.57142857 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00981331 0.00926542 0.00961494 0.00931954 0.00935292 0.00946355
|
|
0.00947499 0.00947022 0.00934291 0.00956202]
|
|
|
|
mean value: 0.009467983245849609
|
|
|
|
key: score_time
|
|
value: [0.00965023 0.00959539 0.00480247 0.00923729 0.0094893 0.00876379
|
|
0.00931597 0.00915647 0.00923562 0.00858641]
|
|
|
|
mean value: 0.008783292770385743
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0. nan 0.81649658 0.81649658 0.81649658
|
|
0.2 0.5 0.6 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.5 nan 0.9 0.9 0.9 0.6 0.7 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.61538462 nan 0.88888889 0.90909091 0.90909091
|
|
0.6 0.76923077 0.8 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.5 nan 1. 0.83333333 0.83333333
|
|
0.6 0.625 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.8 nan 0.8 1. 1. 0.6 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.5 nan 0.9 0.9 0.9 0.6 0.7 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.44444444 nan 0.8 0.83333333 0.83333333
|
|
0.42857143 0.625 0.66666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.050071 1.07899761 1.01138568 1.02257371 1.02732587 1.01924753
|
|
1.05872726 1.03712177 1.03975534 1.04403877]
|
|
|
|
mean value: 1.0389244556427002
|
|
|
|
key: score_time
|
|
value: [0.08884025 0.0888741 0.00447512 0.09609246 0.08854914 0.09480143
|
|
0.10091877 0.09175038 0.0905838 0.08938766]
|
|
|
|
mean value: 0.0834273099899292
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.6 nan 0.81649658 1. 0.81649658
|
|
0.65465367 0.40824829 0.40824829 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 nan 0.9 1. 0.9 0.8 0.7 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.8 nan 0.88888889 1. 0.88888889
|
|
0.75 0.72727273 0.72727273 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.8 nan 1. 1. 1.
|
|
1. 0.66666667 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.8 nan 0.8 1. 0.8 0.6 0.8 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 nan 0.9 1. 0.9 0.8 0.7 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.66666667 nan 0.8 1. 0.8
|
|
0.6 0.57142857 0.57142857 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.86844516 0.87865114 0.92193866 0.84994102 0.85754681 0.82041979
|
|
0.87674642 0.89583445 0.9129231 0.9018712 ]
|
|
|
|
mean value: 0.87843177318573
|
|
|
|
key: score_time
|
|
value: [0.17786026 0.21679783 0.00455117 0.22086644 0.21914244 0.20099974
|
|
0.23634052 0.21746397 0.24935365 0.14481735]
|
|
|
|
mean value: 0.1888193368911743
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.6 nan 0.6 1. 1.
|
|
0.40824829 0.40824829 0.6 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.97801929 0.95555556 0.91201231 0.93356387 0.97801929 0.95555556
|
|
0.95555556 0.95555556 0.97801929 0.93356387]
|
|
|
|
mean value: 0.9535420155810868
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 nan 0.8 1. 1. 0.7 0.7 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98888889 0.97777778 0.95555556 0.96666667 0.98888889 0.97777778
|
|
0.97777778 0.97777778 0.98888889 0.96666667]
|
|
|
|
mean value: 0.9766666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.8 nan 0.8 1. 1.
|
|
0.66666667 0.72727273 0.8 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98876404 0.97777778 0.95454545 0.96629213 0.98901099 0.97777778
|
|
0.97777778 0.97777778 0.98876404 0.96629213]
|
|
|
|
mean value: 0.9764779914218117
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.8 nan 0.8 1. 1.
|
|
0.75 0.66666667 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.97777778 0.97674419 0.97727273 0.97826087 0.97777778
|
|
0.97777778 0.97777778 1. 0.97727273]
|
|
|
|
mean value: 0.9820661621268294
|
|
|
|
key: test_recall
|
|
value: [1. 0.8 nan 0.8 1. 1. 0.6 0.8 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.97777778 0.97777778 0.93333333 0.95555556 1. 0.97777778
|
|
0.97777778 0.97777778 0.97777778 0.95555556]
|
|
|
|
mean value: 0.9711111111111111
|
|
|
|
key: test_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.9 0.8 nan 0.8 1. 1. 0.7 0.7 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98888889 0.97777778 0.95555556 0.96666667 0.98888889 0.97777778
|
|
0.97777778 0.97777778 0.98888889 0.96666667]
|
|
|
|
mean value: 0.9766666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.66666667 nan 0.66666667 1. 1.
|
|
0.5 0.57142857 0.66666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.97777778 0.95652174 0.91304348 0.93478261 0.97826087 0.95652174
|
|
0.95652174 0.95652174 0.97777778 0.93478261]
|
|
|
|
mean value: 0.9542512077294686
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02026916 0.00899649 0.00874615 0.00913286 0.00929546 0.00914478
|
|
0.00965142 0.00860023 0.00842547 0.00850058]
|
|
|
|
mean value: 0.010076260566711426
|
|
|
|
key: score_time
|
|
value: [0.00897002 0.00989938 0.0045855 0.00872135 0.00897264 0.00869513
|
|
0.00925016 0.00857472 0.00852871 0.00845027]
|
|
|
|
mean value: 0.008464789390563965
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.21821789 nan 0. 0.40824829 0.65465367
|
|
0.21821789 0. 0.40824829 0.40824829]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.57906602 0.73624773 0.57906602 0.60540551 0.60238451 0.56056066
|
|
0.64700558 0.67082039 0.62609903 0.64700558]
|
|
|
|
mean value: 0.6253661066190971
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.78888889 0.86666667 0.78888889 0.8 0.8 0.77777778
|
|
0.82222222 0.83333333 0.81111111 0.82222222]
|
|
|
|
mean value: 0.8111111111111111
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.66666667 nan 0.54545455 0.72727273 0.75
|
|
0.5 0.44444444 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.7816092 0.86046512 0.7816092 0.78571429 0.79069767 0.76190476
|
|
0.81395349 0.82352941 0.8 0.81395349]
|
|
|
|
mean value: 0.8013436617630212
|
|
|
|
key: test_precision
|
|
value: [1. 0.57142857 nan 0.5 0.66666667 1.
|
|
0.66666667 0.5 0.75 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.80952381 0.90243902 0.80952381 0.84615385 0.82926829 0.82051282
|
|
0.85365854 0.875 0.85 0.85365854]
|
|
|
|
mean value: 0.8449738675958188
|
|
|
|
key: test_recall
|
|
value: [0.6 0.8 nan 0.6 0.8 0.6 0.4 0.4 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.75555556 0.82222222 0.75555556 0.73333333 0.75555556 0.71111111
|
|
0.77777778 0.77777778 0.75555556 0.77777778]
|
|
|
|
mean value: 0.7622222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.78888889 0.86666667 0.78888889 0.8 0.8 0.77777778
|
|
0.82222222 0.83333333 0.81111111 0.82222222]
|
|
|
|
mean value: 0.8111111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5 nan 0.375 0.57142857 0.6
|
|
0.33333333 0.28571429 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.64150943 0.75510204 0.64150943 0.64705882 0.65384615 0.61538462
|
|
0.68627451 0.7 0.66666667 0.68627451]
|
|
|
|
mean value: 0.6693626187775545
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.1605041 0.03366351 0.0360558 0.05030107 0.04023385 0.03848863
|
|
0.09582305 0.07147694 0.0326159 0.06000638]
|
|
|
|
mean value: 0.06191692352294922
|
|
|
|
key: score_time
|
|
value: [0.01092339 0.01055479 0.00469685 0.01059723 0.01124191 0.01426959
|
|
0.01316428 0.01329756 0.01405334 0.0105629 ]
|
|
|
|
mean value: 0.011336183547973633
|
|
|
|
key: test_mcc
|
|
value: [1. 0.81649658 nan 0.81649658 1. 0.81649658
|
|
0.81649658 0.6 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.9 nan 0.9 1. 0.9 0.9 0.8 0.7 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.90909091 nan 0.88888889 1. 0.90909091
|
|
0.88888889 0.8 0.76923077 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.83333333 nan 1. 1. 0.83333333
|
|
1. 0.8 0.625 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. nan 0.8 1. 1. 0.8 0.8 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.9 nan 0.9 1. 0.9 0.9 0.8 0.7 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.83333333 nan 0.8 1. 0.83333333
|
|
0.8 0.66666667 0.625 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04766369 0.03517723 0.03765726 0.03865457 0.03713489 0.0370903
|
|
0.03802204 0.03782034 0.03993273 0.03703785]
|
|
|
|
mean value: 0.03861908912658692
|
|
|
|
key: score_time
|
|
value: [0.02126861 0.02220416 0.01126933 0.02398229 0.02256036 0.02334976
|
|
0.02145195 0.02393079 0.02469134 0.02361798]
|
|
|
|
mean value: 0.021832656860351563
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.81649658 nan 0.40824829 1. 0.65465367
|
|
0.81649658 0.65465367 0.21821789 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9978019293843652
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.9 nan 0.7 1. 0.8 0.9 0.8 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9988888888888889
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.88888889 nan 0.72727273 1. 0.83333333
|
|
0.90909091 0.83333333 0.66666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9989010989010989
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. nan 0.66666667 1. 0.71428571
|
|
0.83333333 0.71428571 0.57142857 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9978260869565218
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 nan 0.8 1. 1. 1. 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.9 nan 0.7 1. 0.8 0.9 0.8 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9988888888888889
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.8 nan 0.57142857 1. 0.71428571
|
|
0.83333333 0.71428571 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9978260869565218
|
|
|
|
MCC on Blind test: -0.1
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01478887 0.00884414 0.00876284 0.00883818 0.00909948 0.00853968
|
|
0.00871682 0.00872874 0.00913668 0.00861716]
|
|
|
|
mean value: 0.009407258033752442
|
|
|
|
key: score_time
|
|
value: [0.008955 0.00877619 0.00435042 0.00857329 0.00864887 0.0086658
|
|
0.00855088 0.00852728 0.00936174 0.00867295]
|
|
|
|
mean value: 0.00830824375152588
|
|
|
|
key: test_mcc
|
|
value: [0.21821789 0.40824829 nan 0.2 0.81649658 1.
|
|
0.21821789 0. 0.40824829 0.21821789]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.64508188 0.53452248 0.55776344 0.55776344 0.53452248 0.51161666
|
|
0.57906602 0.60238451 0.62609903 0.53452248]
|
|
|
|
mean value: 0.5683342428673076
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.7 nan 0.6 0.9 1. 0.6 0.5 0.7 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.82222222 0.76666667 0.77777778 0.77777778 0.76666667 0.75555556
|
|
0.78888889 0.8 0.81111111 0.76666667]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.72727273 nan 0.6 0.90909091 1.
|
|
0.5 0.44444444 0.72727273 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.81818182 0.75862069 0.76744186 0.76744186 0.75862069 0.75
|
|
0.7816092 0.79069767 0.8 0.75862069]
|
|
|
|
mean value: 0.7751234477898471
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.66666667 nan 0.6 0.83333333 1.
|
|
0.66666667 0.5 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.8372093 0.78571429 0.80487805 0.80487805 0.78571429 0.76744186
|
|
0.80952381 0.82926829 0.85 0.78571429]
|
|
|
|
mean value: 0.8060342219701266
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 nan 0.6 1. 1. 0.4 0.4 0.8 0.4]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.8 0.73333333 0.73333333 0.73333333 0.73333333 0.73333333
|
|
0.75555556 0.75555556 0.75555556 0.73333333]
|
|
|
|
mean value: 0.7466666666666666
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.7 nan 0.6 0.9 1. 0.6 0.5 0.7 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.82222222 0.76666667 0.77777778 0.77777778 0.76666667 0.75555556
|
|
0.78888889 0.8 0.81111111 0.76666667]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.57142857 nan 0.42857143 0.83333333 1.
|
|
0.33333333 0.28571429 0.57142857 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.69230769 0.61111111 0.62264151 0.62264151 0.61111111 0.6
|
|
0.64150943 0.65384615 0.66666667 0.61111111]
|
|
|
|
mean value: 0.6332946298984035
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01021981 0.01365471 0.01286817 0.01448369 0.01383781 0.01418757
|
|
0.01293206 0.01413035 0.0132041 0.01391315]
|
|
|
|
mean value: 0.013343143463134765
|
|
|
|
key: score_time
|
|
value: [0.00872874 0.01148295 0.00617766 0.01167202 0.01157427 0.01153183
|
|
0.01140785 0.01148415 0.01151109 0.01141477]
|
|
|
|
mean value: 0.010698533058166504
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.5 nan 0.40824829 0.81649658 0.81649658
|
|
0.65465367 0.81649658 0.21821789 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.93356387 0.67202151 0.88910845 0.97801929 0.97801929 0.95650071
|
|
0.95650071 0.81649658 0.95650071 0.87447463]
|
|
|
|
mean value: 0.9011205769973302
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.7 nan 0.7 0.9 0.9 0.8 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.96666667 0.81111111 0.94444444 0.98888889 0.98888889 0.97777778
|
|
0.97777778 0.9 0.97777778 0.93333333]
|
|
|
|
mean value: 0.9466666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.76923077 nan 0.66666667 0.90909091 0.88888889
|
|
0.75 0.88888889 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.96629213 0.8411215 0.94382022 0.98876404 0.98901099 0.97727273
|
|
0.97727273 0.88888889 0.97826087 0.92857143]
|
|
|
|
mean value: 0.9479275530403464
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.625 nan 0.75 0.83333333 1.
|
|
1. 1. 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.97727273 0.72580645 0.95454545 1. 0.97826087 1.
|
|
1. 1. 0.95744681 1. ]
|
|
|
|
mean value: 0.9593332311506941
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 0.6 1. 0.8 0.6 0.8 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.95555556 1. 0.93333333 0.97777778 1. 0.95555556
|
|
0.95555556 0.8 1. 0.86666667]
|
|
|
|
mean value: 0.9444444444444444
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.7 nan 0.7 0.9 0.9 0.8 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.96666667 0.81111111 0.94444444 0.98888889 0.98888889 0.97777778
|
|
0.97777778 0.9 0.97777778 0.93333333]
|
|
|
|
mean value: 0.9466666666666668
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.625 nan 0.5 0.83333333 0.8
|
|
0.6 0.8 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.93478261 0.72580645 0.89361702 0.97777778 0.97826087 0.95555556
|
|
0.95555556 0.8 0.95744681 0.86666667]
|
|
|
|
mean value: 0.9045469315216562
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01397181 0.01273942 0.01281476 0.01423192 0.0125308 0.01231194
|
|
0.01235962 0.01259017 0.01245999 0.01275468]
|
|
|
|
mean value: 0.012876510620117188
|
|
|
|
key: score_time
|
|
value: [0.01007605 0.01145029 0.00625587 0.01167154 0.01142955 0.01144981
|
|
0.01150846 0.01144075 0.01146603 0.01146483]
|
|
|
|
mean value: 0.010821318626403809
|
|
|
|
key: test_mcc
|
|
value: [ 1. 0.40824829 nan -0.33333333 0.81649658 1.
|
|
0.65465367 0.81649658 0.21821789 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.91111111 0.93356387 0.93541435 0.33333333 0.95650071 0.91111111
|
|
0.72486118 0.91473203 1. 0.97801929]
|
|
|
|
mean value: 0.8598646994997842
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.7 nan 0.4 0.9 1. 0.8 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.95555556 0.96666667 0.96666667 0.6 0.97777778 0.95555556
|
|
0.84444444 0.95555556 1. 0.98888889]
|
|
|
|
mean value: 0.9211111111111111
|
|
|
|
key: test_fscore
|
|
value: [1. 0.72727273 nan 0. 0.90909091 1.
|
|
0.75 0.90909091 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.95555556 0.96629213 0.96551724 0.33333333 0.97826087 0.95555556
|
|
0.81578947 0.95348837 1. 0.98876404]
|
|
|
|
mean value: 0.8912556580941488
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 nan 0. 0.83333333 1.
|
|
1. 0.83333333 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.95555556 0.97727273 1. 1. 0.95744681 0.95555556
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9845830646894477
|
|
|
|
key: test_recall
|
|
value: [1. 0.8 nan 0. 1. 1. 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.95555556 0.95555556 0.93333333 0.2 1. 0.95555556
|
|
0.68888889 0.91111111 1. 0.97777778]
|
|
|
|
mean value: 0.8577777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.7 nan 0.4 0.9 1. 0.8 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.95555556 0.96666667 0.96666667 0.6 0.97777778 0.95555556
|
|
0.84444444 0.95555556 1. 0.98888889]
|
|
|
|
mean value: 0.9211111111111112
|
|
|
|
key: test_jcc
|
|
value: [1. 0.57142857 nan 0. 0.83333333 1.
|
|
0.6 0.83333333 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.91489362 0.93478261 0.93333333 0.2 0.95744681 0.91489362
|
|
0.68888889 0.91111111 1. 0.97777778]
|
|
|
|
mean value: 0.8433127762359954
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09748411 0.08118486 0.08655453 0.08261776 0.08569789 0.08832264
|
|
0.08770275 0.0874753 0.09288096 0.08781147]
|
|
|
|
mean value: 0.08777322769165039
|
|
|
|
key: score_time
|
|
value: [0.01493359 0.01460385 0.0050025 0.01509309 0.01566195 0.01640439
|
|
0.01560545 0.01591015 0.01580977 0.01584244]
|
|
|
|
mean value: 0.01448671817779541
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.6 nan 0.81649658 1. 0.81649658
|
|
0.81649658 0.40824829 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 nan 0.9 1. 0.9 0.9 0.7 0.7 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.8 nan 0.88888889 1. 0.90909091
|
|
0.88888889 0.72727273 0.76923077 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.8 nan 1. 1. 0.83333333
|
|
1. 0.66666667 0.625 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.8 nan 0.8 1. 1. 0.8 0.8 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 nan 0.9 1. 0.9 0.9 0.7 0.7 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.66666667 nan 0.8 1. 0.83333333
|
|
0.8 0.57142857 0.625 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03620768 0.03868723 0.02701831 0.0291822 0.0329349 0.03723025
|
|
0.05369878 0.03093934 0.03043032 0.04312468]
|
|
|
|
mean value: 0.03594536781311035
|
|
|
|
key: score_time
|
|
value: [0.01804686 0.01641774 0.00487351 0.02259183 0.02211642 0.03795409
|
|
0.03202295 0.02688098 0.03348565 0.03866339]
|
|
|
|
mean value: 0.025305342674255372
|
|
|
|
key: test_mcc
|
|
value: [1. 0.81649658 nan 0.81649658 1. 0.81649658
|
|
0.81649658 0.40824829 0.65465367 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.9 nan 0.9 1. 0.9 0.9 0.7 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.90909091 nan 0.88888889 1. 0.90909091
|
|
0.88888889 0.72727273 0.83333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.83333333 nan 1. 1. 0.83333333
|
|
1. 0.66666667 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. nan 0.8 1. 1. 0.8 0.8 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.9 nan 0.9 1. 0.9 0.9 0.7 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.83333333 nan 0.8 1. 0.83333333
|
|
0.8 0.57142857 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.95
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01397204 0.02746344 0.01704359 0.01630187 0.01670456 0.01774883
|
|
0.01657462 0.01662517 0.03054595 0.01816249]
|
|
|
|
mean value: 0.019114255905151367
|
|
|
|
key: score_time
|
|
value: [0.01164699 0.01156616 0.00662804 0.01197863 0.01218772 0.01199245
|
|
0.01217413 0.01204348 0.02131319 0.02032018]
|
|
|
|
mean value: 0.01318509578704834
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.6 nan 0.2 0.65465367 0.81649658
|
|
0.2 0.5 0. 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 nan 0.6 0.8 0.9 0.6 0.7 0.5 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.8 nan 0.6 0.83333333 0.88888889
|
|
0.6 0.76923077 0.61538462 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 nan 0.6 0.71428571 1.
|
|
0.6 0.625 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 nan 0.6 1. 0.8 0.6 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 nan 0.6 0.8 0.9 0.6 0.7 0.5 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.66666667 nan 0.42857143 0.71428571 0.8
|
|
0.42857143 0.625 0.44444444 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[0.20379448 0.20308447 0.20512509 0.20450664 0.19911671 0.19699526
|
|
0.18761802 0.18994236 0.19499803 0.18119597]
|
|
|
|
mean value: 0.19663770198822023
|
|
|
|
key: score_time
|
|
value: [0.00957632 0.01017761 0.00509214 0.01043415 0.01005363 0.00927019
|
|
0.01004076 0.00947762 0.01011539 0.01008415]
|
|
|
|
mean value: 0.009432196617126465
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 nan 0.6 1. 0.81649658
|
|
0.81649658 0.40824829 0.65465367 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 nan 0.8 1. 0.9 0.9 0.7 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.90909091 nan 0.8 1. 0.90909091
|
|
0.88888889 0.72727273 0.83333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.83333333 nan 0.8 1. 0.83333333
|
|
1. 0.66666667 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 0.8 1. 1. 0.8 0.8 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 nan 0.8 1. 0.9 0.9 0.7 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.83333333 nan 0.66666667 1. 0.83333333
|
|
0.8 0.57142857 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01176 0.01463223 0.01451039 0.01862359 0.0145371 0.01461315
|
|
0.01455307 0.0146296 0.01553369 0.01768708]
|
|
|
|
mean value: 0.015107989311218262
|
|
|
|
key: score_time
|
|
value: [0.01168466 0.01204658 0.00626183 0.01207185 0.01193285 0.01204848
|
|
0.01547551 0.01594973 0.0170939 0.01871085]
|
|
|
|
mean value: 0.013327622413635254
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 nan 0.5 1. 0.81649658
|
|
0.65465367 0.5 0.81649658 0.65465367]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 nan 0.7 1. 0.9 0.8 0.7 0.9 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.88888889 nan 0.57142857 1. 0.88888889
|
|
0.75 0.57142857 0.88888889 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [ 1. 1. nan 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 nan 0.4 1. 0.8 0.6 0.4 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 nan 0.7 1. 0.9 0.8 0.7 0.9 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.8 nan 0.4 1. 0.8 0.6 0.4 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02705026 0.01304579 0.01301503 0.03505015 0.03305626 0.02976966
|
|
0.03926349 0.02756858 0.0333178 0.03212309]
|
|
|
|
mean value: 0.028326010704040526
|
|
|
|
key: score_time
|
|
value: [0.01200557 0.01179862 0.00627947 0.02354193 0.02322149 0.02598286
|
|
0.02078986 0.02019954 0.02314901 0.02163339]
|
|
|
|
mean value: 0.018860173225402833
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.65465367 nan 0.2 0.81649658 1.
|
|
0.40824829 0.81649658 0.21821789 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.97801929 0.97801929 0.97801929 0.97801929 0.97801929 0.97801929
|
|
0.97801929 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9802173644592863
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.8 nan 0.6 0.9 1. 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.83333333 nan 0.6 0.90909091 1.
|
|
0.66666667 0.90909091 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98901099 0.98901099 0.98901099 0.98901099 0.98901099 0.98901099
|
|
0.98901099 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9901098901098901
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.71428571 nan 0.6 0.83333333 1.
|
|
0.75 0.83333333 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9804347826086957
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 0.6 1. 1. 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.8 nan 0.6 0.9 1. 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.71428571 nan 0.42857143 0.83333333 1.
|
|
0.5 0.83333333 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9804347826086957
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:156: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:159: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.11854625 0.19753456 0.18856549 0.10754013 0.16717076 0.19964838
|
|
0.11414194 0.19587135 0.26366663 0.21352744]
|
|
|
|
mean value: 0.17662129402160645
|
|
|
|
key: score_time
|
|
value: [0.02023578 0.02330852 0.01283813 0.01211834 0.02970982 0.02259326
|
|
0.01234365 0.02518892 0.02464271 0.02279258]
|
|
|
|
mean value: 0.020577168464660643
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.65465367 nan 0.2 0.81649658 0.65465367
|
|
0.40824829 0.81649658 0.21821789 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.97801929 0.97801929 0.97801929 0.97801929 0.97801929 1.
|
|
0.97801929 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9824154350749212
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.8 nan 0.6 0.9 0.8 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9911111111111112
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.83333333 nan 0.6 0.90909091 0.83333333
|
|
0.66666667 0.90909091 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98901099 0.98901099 0.98901099 0.98901099 0.98901099 1.
|
|
0.98901099 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9912087912087912
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.71428571 nan 0.6 0.83333333 0.71428571
|
|
0.75 0.83333333 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9826086956521739
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 0.6 1. 1. 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.8 nan 0.6 0.9 0.8 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.991111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.71428571 nan 0.42857143 0.83333333 0.71428571
|
|
0.5 0.83333333 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9826086956521739
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02091384 0.02012491 0.02000952 0.02053618 0.02216005 0.02067947
|
|
0.03914547 0.03124046 0.02153111 0.02034235]
|
|
|
|
mean value: 0.023668336868286132
|
|
|
|
key: score_time
|
|
value: [0.01170325 0.01167917 0.01161551 0.01167727 0.01176381 0.00625944
|
|
0.00652528 0.01202559 0.01162553 0.011724 ]
|
|
|
|
mean value: 0.010659885406494141
|
|
|
|
key: test_mcc
|
|
value: [ 0.33333333 0.70710678 0. -0.70710678 0. nan
|
|
nan 1. 1. 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.9258201 0.92307692 0.9258201 1. 1. 0.96225045
|
|
0.9258201 0.96225045 0.96291111 0.96296296]
|
|
|
|
mean value: 0.9550912190805847
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.83333333 0.5 0.16666667 0.5 nan
|
|
nan 1. 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.96153846 0.96153846 0.96153846 1. 1. 0.98076923
|
|
0.96153846 0.98076923 0.98113208 0.98113208]
|
|
|
|
mean value: 0.9769956458635704
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.85714286 0.66666667 0.28571429 0.4 nan
|
|
nan 1. 1. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.96 0.96153846 0.96296296 1. 1. 0.98113208
|
|
0.96296296 0.98113208 0.98181818 0.98113208]
|
|
|
|
mean value: 0.9772678795697664
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.5 0.25 0.5 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.96153846 0.92857143 1. 1. 0.96296296
|
|
0.92857143 0.96296296 0.96428571 0.96296296]
|
|
|
|
mean value: 0.9671855921855922
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 0.33333333 0.33333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.92307692 0.96153846 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9884615384615385
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.83333333 0.5 0.16666667 0.5 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.96153846 0.96153846 0.96153846 1. 1. 0.98076923
|
|
0.96153846 0.98076923 0.98076923 0.98148148]
|
|
|
|
mean value: 0.976994301994302
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.75 0.5 0.16666667 0.25 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.92307692 0.92592593 0.92857143 1. 1. 0.96296296
|
|
0.92857143 0.96296296 0.96428571 0.96296296]
|
|
|
|
mean value: 0.9559320309320309
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.32553601 0.34272623 0.32583499 0.36345124 0.3234396 0.33710957
|
|
0.35224915 0.35124803 0.39501452 0.36207604]
|
|
|
|
mean value: 0.347868537902832
|
|
|
|
key: score_time
|
|
value: [0.01237249 0.01205635 0.01210785 0.01214385 0.01202536 0.00631595
|
|
0.00638008 0.01196527 0.01431847 0.01206231]
|
|
|
|
mean value: 0.011174798011779785
|
|
|
|
key: test_mcc
|
|
value: [0. 0.70710678 0. 0. 0. nan
|
|
nan 1. 1. 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.88527041 1. 0.84866842 1. 1. 0.88527041
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9619209250306358
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.83333333 0.5 0.5 0.5 nan
|
|
nan 1. 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.94230769 1. 0.92307692 1. 1. 0.94230769
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9807692307692307
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.85714286 0.66666667 0.57142857 0.4 nan
|
|
nan 1. 1. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.94117647 1. 0.92592593 1. 1. 0.94339623
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9810498622929256
|
|
|
|
key: test_precision
|
|
value: [0.5 0.75 0.5 0.5 0.5 nan nan 1. 1. 0.75]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.96 1. 0.89285714 1. 1. 0.92592593
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9778783068783069
|
|
|
|
key: test_recall
|
|
value: [0.33333333 1. 1. 0.66666667 0.33333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.92307692 1. 0.96153846 1. 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9846153846153847
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.83333333 0.5 0.5 0.5 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.94230769 1. 0.92307692 1. 1. 0.94230769
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9807692307692308
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.75 0.5 0.4 0.25 nan nan 1. 1. 0.75]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.88888889 1. 0.86206897 1. 1. 0.89285714
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9643814997263274
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01288819 0.01199651 0.01104617 0.00844717 0.00849009 0.00838327
|
|
0.00898004 0.00881791 0.00832319 0.0085094 ]
|
|
|
|
mean value: 0.009588193893432618
|
|
|
|
key: score_time
|
|
value: [0.01248598 0.01183987 0.00980401 0.00890875 0.0086236 0.00427198
|
|
0.0042758 0.00965548 0.00923252 0.00854301]
|
|
|
|
mean value: 0.008764100074768067
|
|
|
|
key: test_mcc
|
|
value: [0. 0. 0.4472136 0. 0. nan
|
|
nan 0.33333333 0.61237244 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.70064905 0.65824263 0.81312325 0.66666667 0.76923077 0.5990423
|
|
0.58333333 0.71151247 0.73357097 0.70527596]
|
|
|
|
mean value: 0.6940647388614967
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.5 0.66666667 0.5 0.5 nan
|
|
nan 0.66666667 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.84615385 0.82692308 0.90384615 0.80769231 0.88461538 0.78846154
|
|
0.76923077 0.84615385 0.8490566 0.8490566 ]
|
|
|
|
mean value: 0.8371190130624093
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.57142857 0.75 0.57142857 0.57142857 nan
|
|
nan 0.66666667 0.66666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.83333333 0.81632653 0.89795918 0.76190476 0.88461538 0.81355932
|
|
0.71428571 0.82608696 0.82608696 0.83333333]
|
|
|
|
mean value: 0.8207491476835619
|
|
|
|
key: test_precision
|
|
value: [0.5 0.5 0.6 0.5 0.5 nan
|
|
nan 0.66666667 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.90909091 0.86956522 0.95652174 1. 0.88461538 0.72727273
|
|
0.9375 0.95 1. 0.90909091]
|
|
|
|
mean value: 0.9143656886591669
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 1. 0.66666667 0.66666667 nan
|
|
nan 0.66666667 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.76923077 0.76923077 0.84615385 0.61538462 0.88461538 0.92307692
|
|
0.57692308 0.73076923 0.7037037 0.76923077]
|
|
|
|
mean value: 0.7588319088319089
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.66666667 0.5 0.5 nan
|
|
nan 0.66666667 0.75 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.84615385 0.82692308 0.90384615 0.80769231 0.88461538 0.78846154
|
|
0.76923077 0.84615385 0.85185185 0.84757835]
|
|
|
|
mean value: 0.8372507122507122
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.4 0.6 0.4 0.4 nan
|
|
nan 0.5 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.71428571 0.68965517 0.81481481 0.61538462 0.79310345 0.68571429
|
|
0.55555556 0.7037037 0.7037037 0.71428571]
|
|
|
|
mean value: 0.6990206728137762
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00965214 0.00892282 0.00858331 0.00853586 0.0085423 0.00859547
|
|
0.0098629 0.00870371 0.00837135 0.00841379]
|
|
|
|
mean value: 0.008818364143371582
|
|
|
|
key: score_time
|
|
value: [0.00977111 0.00953913 0.00881982 0.00860023 0.00865912 0.00424123
|
|
0.00450349 0.00848484 0.008569 0.00859237]
|
|
|
|
mean value: 0.007978034019470216
|
|
|
|
key: test_mcc
|
|
value: [ 0.4472136 -0.33333333 0. -0.33333333 0. nan
|
|
nan -0.33333333 0.66666667 0.16666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.77151675 0.63245553 0.84615385 0.82305489 0.80829038 0.77849894
|
|
0.65433031 0.74466871 0.74106548 0.73609205]
|
|
|
|
mean value: 0.7536126885007749
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.33333333 0.5 0.33333333 0.5 nan
|
|
nan 0.33333333 0.8 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538
|
|
0.82692308 0.86538462 0.86792453 0.86792453]
|
|
|
|
mean value: 0.8735849056603774
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.33333333 0.66666667 0.33333333 0.4 nan
|
|
nan 0.33333333 0.8 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.88 0.7826087 0.92307692 0.89361702 0.90196078 0.875
|
|
0.83018868 0.85106383 0.8627451 0.8627451 ]
|
|
|
|
mean value: 0.8663006129430366
|
|
|
|
key: test_precision
|
|
value: [1. 0.33333333 0.5 0.33333333 0.5 nan
|
|
nan 0.33333333 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.91666667 0.9 0.92307692 1. 0.92 0.95454545
|
|
0.81481481 0.95238095 0.91666667 0.88 ]
|
|
|
|
mean value: 0.9178151478151478
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.33333333 1. 0.33333333 0.33333333 nan
|
|
nan 0.33333333 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.84615385 0.69230769 0.92307692 0.80769231 0.88461538 0.80769231
|
|
0.84615385 0.76923077 0.81481481 0.84615385]
|
|
|
|
mean value: 0.8237891737891738
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.33333333 0.5 0.33333333 0.5 nan
|
|
nan 0.33333333 0.83333333 0.58333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538
|
|
0.82692308 0.86538462 0.86894587 0.86752137]
|
|
|
|
mean value: 0.8736467236467237
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.2 0.5 0.2 0.25 nan
|
|
nan 0.2 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.78571429 0.64285714 0.85714286 0.80769231 0.82142857 0.77777778
|
|
0.70967742 0.74074074 0.75862069 0.75862069]
|
|
|
|
mean value: 0.7660272482018867
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00931239 0.0100131 0.00874043 0.00838232 0.009269 0.00925255
|
|
0.00977898 0.00942922 0.00934243 0.00922894]
|
|
|
|
mean value: 0.009274935722351075
|
|
|
|
key: score_time
|
|
value: [0.01010799 0.00981784 0.00952053 0.00997114 0.0101397 0.00483942
|
|
0.00481582 0.01042175 0.01021457 0.01005983]
|
|
|
|
mean value: 0.008990859985351563
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. 0. -0.70710678 -0.33333333 nan
|
|
nan 0.70710678 0.66666667 0.16666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.54494926 0.4259217 0.54006172 0.62279916 0.66628253 0.58080232
|
|
0.54006172 0.65433031 0.28612567 0.58766552]
|
|
|
|
mean value: 0.5448999906190637
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.5 0.5 0.16666667 0.33333333 nan
|
|
nan 0.83333333 0.8 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.76923077 0.71153846 0.76923077 0.80769231 0.82692308 0.78846154
|
|
0.76923077 0.82692308 0.64150943 0.79245283]
|
|
|
|
mean value: 0.7703193033381712
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.4 0.66666667 0. 0.33333333 nan
|
|
nan 0.85714286 0.8 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.75 0.69387755 0.76 0.79166667 0.80851064 0.7755102
|
|
0.77777778 0.82352941 0.62745098 0.7755102 ]
|
|
|
|
mean value: 0.7583833434082853
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
test_precision
|
|
value: [0.5 0.5 0.5 0. 0.33333333 nan
|
|
nan 0.75 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.81818182 0.73913043 0.79166667 0.86363636 0.9047619 0.82608696
|
|
0.75 0.84 0.66666667 0.82608696]
|
|
|
|
mean value: 0.8026217767739506
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.33333333 1. 0. 0.33333333 nan
|
|
nan 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.69230769 0.65384615 0.73076923 0.73076923 0.73076923 0.73076923
|
|
0.80769231 0.80769231 0.59259259 0.73076923]
|
|
|
|
mean value: 0.7207977207977208
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.5 0.16666667 0.33333333 nan
|
|
nan 0.83333333 0.83333333 0.58333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.76923077 0.71153846 0.76923077 0.80769231 0.82692308 0.78846154
|
|
0.76923077 0.82692308 0.64245014 0.79131054]
|
|
|
|
mean value: 0.7702991452991453
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.25 0.5 0. 0.2 nan
|
|
nan 0.75 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.6 0.53125 0.61290323 0.65517241 0.67857143 0.63333333
|
|
0.63636364 0.7 0.45714286 0.63333333]
|
|
|
|
mean value: 0.6138070228344144
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00890541 0.00955153 0.00889421 0.00859499 0.00959396 0.00865912
|
|
0.01014996 0.00967169 0.01003861 0.01146197]
|
|
|
|
mean value: 0.00955214500427246
|
|
|
|
key: score_time
|
|
value: [0.00898552 0.00915551 0.00873256 0.00884151 0.00935507 0.00436473
|
|
0.00465631 0.0094142 0.00948119 0.01040006]
|
|
|
|
mean value: 0.008338665962219239
|
|
|
|
key: test_mcc
|
|
value: [-0.4472136 -0.33333333 0. -0.70710678 0. nan
|
|
nan 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.89056356 0.84866842 0.92307692 0.89056356 0.96225045 0.9258201
|
|
0.84866842 0.96225045 0.92724773 0.88730475]
|
|
|
|
mean value: 0.9066414371100951
|
|
|
|
key: test_accuracy
|
|
value: [0.33333333 0.33333333 0.5 0.16666667 0.5 nan
|
|
nan 1. 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.94230769 0.92307692 0.96153846 0.94230769 0.98076923 0.96153846
|
|
0.92307692 0.98076923 0.96226415 0.94339623]
|
|
|
|
mean value: 0.9521044992743106
|
|
|
|
key: test_fscore
|
|
value: [0. 0.33333333 0.66666667 0.28571429 0.4 nan
|
|
nan 1. 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.93877551 0.92 0.96153846 0.93877551 0.98113208 0.96
|
|
0.92592593 0.98039216 0.96153846 0.94117647]
|
|
|
|
mean value: 0.9509254572333691
|
|
|
|
key: test_precision
|
|
value: [0. 0.33333333 0.5 0.25 0.5 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.95833333 0.96153846 1. 0.96296296 1.
|
|
0.89285714 1. 1. 0.96 ]
|
|
|
|
mean value: 0.9735691900691901
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 1. 0.33333333 0.33333333 nan
|
|
nan 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.88461538 0.88461538 0.96153846 0.88461538 1. 0.92307692
|
|
0.96153846 0.96153846 0.92592593 0.92307692]
|
|
|
|
mean value: 0.931054131054131
|
|
|
|
key: test_roc_auc
|
|
value: [0.33333333 0.33333333 0.5 0.16666667 0.5 nan
|
|
nan 1. 1. 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.94230769 0.92307692 0.96153846 0.94230769 0.98076923 0.96153846
|
|
0.92307692 0.98076923 0.96296296 0.94301994]
|
|
|
|
mean value: 0.9521367521367522
|
|
|
|
key: test_jcc
|
|
value: [0. 0.2 0.5 0.16666667 0.25 nan
|
|
nan 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.88461538 0.85185185 0.92592593 0.88461538 0.96296296 0.92307692
|
|
0.86206897 0.96153846 0.92592593 0.88888889]
|
|
|
|
mean value: 0.9071470674918951
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.27838421 0.40681887 0.27178621 0.42920971 0.31241632 0.35591602
|
|
0.32682848 0.27644706 0.28436017 0.2995646 ]
|
|
|
|
mean value: 0.3241731643676758
|
|
|
|
key: score_time
|
|
value: [0.01218176 0.01219702 0.01212978 0.01238561 0.01210904 0.00661492
|
|
0.00656652 0.01193714 0.01193643 0.01183224]
|
|
|
|
mean value: 0.010989046096801758
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.70710678 0. 0. 0. nan
|
|
nan 0.70710678 1. 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.83333333 0.5 0.5 0.5 nan
|
|
nan 0.83333333 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.85714286 0.66666667 0.57142857 0.4 nan
|
|
nan 0.85714286 1. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.5 0.5 0.5 nan
|
|
nan 0.75 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 0.66666667 0.33333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.83333333 0.5 0.5 0.5 nan
|
|
nan 0.83333333 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.75 0.5 0.4 0.25 nan nan 0.75 1. 0.75]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01573658 0.01213527 0.00921369 0.00900602 0.00878453 0.00891757
|
|
0.0091784 0.00918722 0.00907421 0.00912213]
|
|
|
|
mean value: 0.010035562515258788
|
|
|
|
key: score_time
|
|
value: [0.01291966 0.00884986 0.00869918 0.00836587 0.00835061 0.0041151
|
|
0.00418639 0.00848269 0.00831079 0.00835586]
|
|
|
|
mean value: 0.008063602447509765
|
|
|
|
key: test_mcc
|
|
value: [0.70710678 0.70710678 0.33333333 1. 0.70710678 nan
|
|
nan 0.70710678 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.83333333 0.66666667 1. 0.83333333 nan
|
|
nan 0.83333333 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.85714286 0.66666667 1. 0.8 nan
|
|
nan 0.8 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 0.66666667 1. 1. nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.66666667 1. 0.66666667 nan
|
|
nan 0.66666667 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.83333333 0.66666667 1. 0.83333333 nan
|
|
nan 0.83333333 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.75 0.5 1. 0.66666667 nan
|
|
nan 0.66666667 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07804847 0.07725072 0.07686806 0.07656288 0.07687521 0.07669759
|
|
0.07711124 0.0770638 0.07734013 0.07700109]
|
|
|
|
mean value: 0.07708191871643066
|
|
|
|
key: score_time
|
|
value: [0.01668477 0.01657367 0.01659799 0.01660562 0.01659155 0.00435138
|
|
0.0042994 0.01672506 0.01664042 0.01662922]
|
|
|
|
mean value: 0.014169907569885254
|
|
|
|
key: test_mcc
|
|
value: [0. 0. 0.4472136 0. 0. nan
|
|
nan 1. 0.61237244 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.5 0.66666667 0.5 0.5 nan
|
|
nan 1. 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.57142857 0.75 0.4 0.4 nan
|
|
nan 1. 0.66666667 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.5 0.6 0.5 0.5 nan nan 1. 1. 0.75]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 1. 0.33333333 0.33333333 nan
|
|
nan 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.66666667 0.5 0.5 nan
|
|
nan 1. 0.75 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.4 0.6 0.25 0.25 nan nan 1. 0.5 0.75]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42) /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00831699 0.00808382 0.0081141 0.00813127 0.00810838 0.00812197
|
|
0.00812531 0.00818348 0.00840473 0.00810528]
|
|
|
|
mean value: 0.00816953182220459
|
|
|
|
key: score_time
|
|
value: [0.00827551 0.00835371 0.00831056 0.00835538 0.00824785 0.00409269
|
|
0.00411844 0.00829554 0.00831127 0.00829291]
|
|
|
|
mean value: 0.007465386390686035
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. -0.33333333 -0.33333333 -0.33333333 nan
|
|
nan -0.33333333 0.16666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.5 0.33333333 0.33333333 0.33333333 nan
|
|
nan 0.33333333 0.6 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.4 0.33333333 0.33333333 0.33333333 nan
|
|
nan 0.33333333 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.5 0.33333333 0.33333333 0.33333333 nan
|
|
nan 0.33333333 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.33333333 0.33333333 0.33333333 0.33333333 nan
|
|
nan 0.33333333 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.33333333 0.33333333 0.33333333 nan
|
|
nan 0.33333333 0.58333333 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.25 0.2 0.2 0.2 nan
|
|
nan 0.2 0.33333333 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.0098381 0.95413804 0.96767163 0.95117521 0.95051098 0.95307279
|
|
0.95376849 0.95404077 0.95342731 0.95893645]
|
|
|
|
mean value: 0.9606579780578614
|
|
|
|
key: score_time
|
|
value: [0.08670974 0.08692455 0.08682108 0.08670044 0.08678889 0.00440526
|
|
0.00438118 0.0927279 0.08725643 0.08669782]
|
|
|
|
mean value: 0.07094132900238037
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.4472136 0.4472136 -0.33333333 0. nan
|
|
nan 1. 0.61237244 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.66666667 0.66666667 0.33333333 0.5 nan
|
|
nan 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.75 0.75 0.33333333 0.4 nan
|
|
nan 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.6 0.6 0.33333333 0.5 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 1. 1. 0.33333333 0.33333333 nan
|
|
nan 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.66666667 0.66666667 0.33333333 0.5 nan
|
|
nan 1. 0.75 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.6 0.6 0.2 0.25 nan nan 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.77283669 0.89461184 0.82807112 0.81345439 0.84339142 0.90077519
|
|
0.80800915 0.86690879 0.86262417 0.93805766]
|
|
|
|
mean value: 0.8528740406036377
|
|
|
|
key: score_time
|
|
value: [0.18392515 0.20116615 0.21894264 0.19048834 0.13761568 0.00455546
|
|
0.0051651 0.17677927 0.19888759 0.21970034]
|
|
|
|
mean value: 0.15372257232666015
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.4472136 0.70710678 -0.33333333 0. nan
|
|
nan 1. 0.61237244 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.96225045 1. 0.96225045 1. 0.96225045
|
|
0.9258201 1. 1. 0.96296296]
|
|
|
|
mean value: 0.9775534408683644
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.66666667 0.83333333 0.33333333 0.5 nan
|
|
nan 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98076923 1. 0.98076923 1. 0.98076923
|
|
0.96153846 1. 1. 0.98113208]
|
|
|
|
mean value: 0.9884978229317852
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.75 0.85714286 0.33333333 0.4 nan
|
|
nan 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98113208 1. 0.98039216 1. 0.98113208
|
|
0.96296296 1. 1. 0.98113208]
|
|
|
|
mean value: 0.9886751346240803
|
|
|
|
key: test_precision
|
|
value: [0.5 0.6 0.75 0.33333333 0.5 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.96296296 1. 1. 1. 0.96296296
|
|
0.92857143 1. 1. 0.96296296]
|
|
|
|
mean value: 0.9817460317460317
|
|
|
|
key: test_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.33333333 1. 1. 0.33333333 0.33333333 nan
|
|
nan 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.96153846 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9961538461538462
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.66666667 0.83333333 0.33333333 0.5 nan
|
|
nan 1. 0.75 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98076923 1. 0.98076923 1. 0.98076923
|
|
0.96153846 1. 1. 0.98148148]
|
|
|
|
mean value: 0.9885327635327635
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.6 0.75 0.2 0.25 nan nan 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.96296296 1. 0.96153846 1. 0.96296296
|
|
0.92857143 1. 1. 0.96296296]
|
|
|
|
mean value: 0.9778998778998779
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.020298 0.00831819 0.00845861 0.0083611 0.00830889 0.00845575
|
|
0.00973797 0.00877762 0.00922585 0.00842929]
|
|
|
|
mean value: 0.009837126731872559
|
|
|
|
key: score_time
|
|
value: [0.01680541 0.00865436 0.00859785 0.00842381 0.00902247 0.00420737
|
|
0.00420499 0.00842237 0.00900936 0.00894046]
|
|
|
|
mean value: 0.00862884521484375
|
|
|
|
key: test_mcc
|
|
value: [ 0.4472136 -0.33333333 0. -0.33333333 0. nan
|
|
nan -0.33333333 0.66666667 0.16666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.77151675 0.63245553 0.84615385 0.82305489 0.80829038 0.77849894
|
|
0.65433031 0.74466871 0.74106548 0.73609205]
|
|
|
|
mean value: 0.7536126885007749
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.33333333 0.5 0.33333333 0.5 nan
|
|
nan 0.33333333 0.8 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538
|
|
0.82692308 0.86538462 0.86792453 0.86792453]
|
|
|
|
mean value: 0.8735849056603774
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.33333333 0.66666667 0.33333333 0.4 nan
|
|
nan 0.33333333 0.8 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.88 0.7826087 0.92307692 0.89361702 0.90196078 0.875
|
|
0.83018868 0.85106383 0.8627451 0.8627451 ]
|
|
|
|
mean value: 0.8663006129430366
|
|
|
|
key: test_precision
|
|
value: [1. 0.33333333 0.5 0.33333333 0.5 nan
|
|
nan 0.33333333 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.91666667 0.9 0.92307692 1. 0.92 0.95454545
|
|
0.81481481 0.95238095 0.91666667 0.88 ]
|
|
|
|
mean value: 0.9178151478151478
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.33333333 1. 0.33333333 0.33333333 nan
|
|
nan 0.33333333 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.84615385 0.69230769 0.92307692 0.80769231 0.88461538 0.80769231
|
|
0.84615385 0.76923077 0.81481481 0.84615385]
|
|
|
|
mean value: 0.8237891737891738
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.33333333 0.5 0.33333333 0.5 nan
|
|
nan 0.33333333 0.83333333 0.58333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538
|
|
0.82692308 0.86538462 0.86894587 0.86752137]
|
|
|
|
mean value: 0.8736467236467237
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.2 0.5 0.2 0.25 nan
|
|
nan 0.2 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.78571429 0.64285714 0.85714286 0.80769231 0.82142857 0.77777778
|
|
0.70967742 0.74074074 0.75862069 0.75862069]
|
|
|
|
mean value: 0.7660272482018867
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.2208004 0.03246665 0.02982497 0.03053617 0.03095222 0.05028415
|
|
0.04468656 0.03867793 0.03164172 0.07298398]
|
|
|
|
mean value: 0.05828547477722168
|
|
|
|
key: score_time
|
|
value: [0.01097751 0.01042724 0.01009774 0.01005578 0.01014471 0.0048039
|
|
0.00504088 0.01020074 0.00997043 0.0110755 ]
|
|
|
|
mean value: 0.009279441833496094
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.33333333 0.70710678 0.70710678 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.66666667 0.83333333 0.83333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.66666667 0.8 0.85714286 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.66666667 1. 0.75 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 0.66666667 1. nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.66666667 0.83333333 0.83333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.5 0.66666667 0.75 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02003193 0.03504539 0.02579474 0.01418924 0.01421022 0.01418233
|
|
0.0250864 0.03580475 0.03477359 0.03485084]
|
|
|
|
mean value: 0.02539694309234619
|
|
|
|
key: score_time
|
|
value: [0.02190375 0.0215292 0.01140451 0.01143599 0.01142645 0.00573635
|
|
0.00591302 0.02012944 0.02000165 0.02008748]
|
|
|
|
mean value: 0.014956784248352051
|
|
|
|
key: test_mcc
|
|
value: [0.4472136 0.70710678 0.33333333 0. 0. nan
|
|
nan 0.70710678 0.40824829 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.92307692 0.96225045 1. 1. 0.96225045
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9847577820375676
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.83333333 0.66666667 0.5 0.5 nan
|
|
nan 0.83333333 0.6 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.96153846 0.98076923 1. 1. 0.98076923
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9923076923076923
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.85714286 0.66666667 0.4 0.4 nan
|
|
nan 0.85714286 0.66666667 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.96153846 0.98113208 1. 1. 0.98113208
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9923802612481858
|
|
|
|
key: test_precision
|
|
value: [0.6 0.75 0.66666667 0.5 0.5 nan
|
|
nan 0.75 0.5 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.96153846 0.96296296 1. 1. 0.96296296
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9887464387464387
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 0.33333333 0.33333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 0.96153846 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9961538461538462
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.83333333 0.66666667 0.5 0.5 nan
|
|
nan 0.83333333 0.66666667 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.96153846 0.98076923 1. 1. 0.98076923
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9923076923076923
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.75 0.5 0.25 0.25 nan nan 0.75 0.5 0.75]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.92592593 0.96296296 1. 1. 0.96296296
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9851851851851852
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02043128 0.0085032 0.0083077 0.008286 0.0082469 0.00821781
|
|
0.00827694 0.00811672 0.0081346 0.00810599]
|
|
|
|
mean value: 0.009462714195251465
|
|
|
|
key: score_time
|
|
value: [0.00872779 0.00860095 0.0085876 0.0083971 0.00843334 0.00414538
|
|
0.00438237 0.00828457 0.00826526 0.00826359]
|
|
|
|
mean value: 0.007608795166015625
|
|
|
|
key: test_mcc
|
|
value: [-0.4472136 -0.4472136 0. -0.70710678 -0.33333333 nan
|
|
nan 1. 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.6172134 0.58080232 0.73131034 0.57735027 0.6172134 0.65433031
|
|
0.54006172 0.69230769 0.58487934 0.50927299]
|
|
|
|
mean value: 0.6104741777466153
|
|
|
|
key: test_accuracy
|
|
value: [0.33333333 0.33333333 0.5 0.16666667 0.33333333 nan
|
|
nan 1. 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.80769231 0.78846154 0.86538462 0.78846154 0.80769231 0.82692308
|
|
0.76923077 0.84615385 0.79245283 0.75471698]
|
|
|
|
mean value: 0.8047169811320755
|
|
|
|
key: test_fscore
|
|
value: [0. 0.5 0.66666667 0.28571429 0.33333333 nan
|
|
nan 1. 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.8 0.7755102 0.86792453 0.78431373 0.8 0.82352941
|
|
0.77777778 0.84615385 0.8 0.74509804]
|
|
|
|
mean value: 0.8020307532785732
|
|
|
|
key: test_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0. 0.4 0.5 0.25 0.33333333 nan
|
|
nan 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.83333333 0.82608696 0.85185185 0.8 0.83333333 0.84
|
|
0.75 0.84615385 0.78571429 0.76 ]
|
|
|
|
mean value: 0.812647360690839
|
|
|
|
key: test_recall
|
|
value: [0. 0.66666667 1. 0.33333333 0.33333333 nan
|
|
nan 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.76923077 0.73076923 0.88461538 0.76923077 0.76923077 0.80769231
|
|
0.80769231 0.84615385 0.81481481 0.73076923]
|
|
|
|
mean value: 0.793019943019943
|
|
|
|
key: test_roc_auc
|
|
value: [0.33333333 0.33333333 0.5 0.16666667 0.33333333 nan
|
|
nan 1. 0.83333333 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.80769231 0.78846154 0.86538462 0.78846154 0.80769231 0.82692308
|
|
0.76923077 0.84615385 0.79202279 0.7542735 ]
|
|
|
|
mean value: 0.8046296296296296
|
|
|
|
key: test_jcc
|
|
value: [0. 0.33333333 0.5 0.16666667 0.2 nan
|
|
nan 1. 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.66666667 0.63333333 0.76666667 0.64516129 0.66666667 0.7
|
|
0.63636364 0.73333333 0.66666667 0.59375 ]
|
|
|
|
mean value: 0.6708608260019551
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00999832 0.01279449 0.01210523 0.01255631 0.01224685 0.01186943
|
|
0.0117228 0.01643276 0.01289439 0.01178789]
|
|
|
|
mean value: 0.012440848350524902
|
|
|
|
key: score_time
|
|
value: [0.0086329 0.01154256 0.01120043 0.01119494 0.01112103 0.00591636
|
|
0.00598788 0.01161695 0.01126814 0.01119208]
|
|
|
|
mean value: 0.009967327117919922
|
|
|
|
key: test_mcc
|
|
value: [ 0.33333333 0.33333333 0. -0.33333333 0. nan
|
|
nan 1. 1. 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.82305489 0.9258201 0.96225045 1. 0.9258201
|
|
0.72760688 0.9258201 0.96291111 0.85922733]
|
|
|
|
mean value: 0.9112510953164517
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.66666667 0.5 0.33333333 0.5 nan
|
|
nan 1. 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.90384615 0.96153846 0.98076923 1. 0.96153846
|
|
0.84615385 0.96153846 0.98113208 0.9245283 ]
|
|
|
|
mean value: 0.9521044992743106
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.66666667 0.33333333 0.4 nan
|
|
nan 1. 1. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.89361702 0.96296296 0.98039216 1. 0.96
|
|
0.86666667 0.96296296 0.98181818 0.92857143]
|
|
|
|
mean value: 0.9536991381121543
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.5 0.33333333 0.5 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.92857143 1. 1. 1.
|
|
0.76470588 0.92857143 0.96428571 0.86666667]
|
|
|
|
mean value: 0.945280112044818
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 0.33333333 0.33333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 0.80769231 1. 0.96153846 1. 0.92307692
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9692307692307692
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.66666667 0.5 0.33333333 0.5 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.90384615 0.96153846 0.98076923 1. 0.96153846
|
|
0.84615385 0.96153846 0.98076923 0.92592593]
|
|
|
|
mean value: 0.9522079772079772
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.5 0.2 0.25 nan nan 1. 1. 0.75]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.80769231 0.92857143 0.96153846 1. 0.92307692
|
|
0.76470588 0.92857143 0.96428571 0.86666667]
|
|
|
|
mean value: 0.9145108812755872
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01173449 0.0120728 0.01188064 0.01165843 0.01149154 0.01192474
|
|
0.01300406 0.01200342 0.01213932 0.01185942]
|
|
|
|
mean value: 0.011976885795593261
|
|
|
|
key: score_time
|
|
value: [0.0111804 0.01164484 0.01130462 0.0112021 0.01118112 0.0060792
|
|
0.00607777 0.01164412 0.01155257 0.01169229]
|
|
|
|
mean value: 0.010355901718139649
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.4472136 0. -0.33333333 0. nan
|
|
nan 1. 1. 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.82305489 0.71151247 1. 0.69693205 1. 0.82305489
|
|
1. 0.85634884 1. 0.92724773]
|
|
|
|
mean value: 0.8838150881832578
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.66666667 0.5 0.33333333 0.5 nan
|
|
nan 1. 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.90384615 0.84615385 1. 0.82692308 1. 0.90384615
|
|
1. 0.92307692 1. 0.96226415]
|
|
|
|
mean value: 0.936611030478955
|
|
|
|
key: test_fscore
|
|
value: [0. 0.5 0.66666667 0.33333333 0.4 nan
|
|
nan 1. 1. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.89361702 0.82608696 1. 0.79069767 1. 0.89361702
|
|
1. 0.92857143 1. 0.96296296]
|
|
|
|
mean value: 0.9295553065027927
|
|
|
|
key: test_precision
|
|
value: [0. 1. 0.5 0.33333333 0.5 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.95 1. 1. 1. 1.
|
|
1. 0.86666667 1. 0.92857143]
|
|
|
|
mean value: 0.9745238095238096
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 1. 0.33333333 0.33333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.80769231 0.73076923 1. 0.65384615 1. 0.80769231
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.66666667 0.5 0.33333333 0.5 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.90384615 0.84615385 1. 0.82692308 1. 0.90384615
|
|
1. 0.92307692 1. 0.96296296]
|
|
|
|
mean value: 0.9366809116809117
|
|
|
|
key: test_jcc
|
|
value: [0. 0.33333333 0.5 0.2 0.25 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.80769231 0.7037037 1. 0.65384615 1. 0.80769231
|
|
1. 0.86666667 1. 0.92857143]
|
|
|
|
mean value: 0.8768172568172569
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08145452 0.07150626 0.07048178 0.07051468 0.07052875 0.07111621
|
|
0.07133055 0.07053328 0.07560372 0.0774231 ]
|
|
|
|
mean value: 0.07304928302764893
|
|
|
|
key: score_time
|
|
value: [0.01477504 0.01447749 0.01440597 0.01421499 0.01418257 0.00448847
|
|
0.00439095 0.01417637 0.0155952 0.01548576]
|
|
|
|
mean value: 0.012619280815124511
|
|
|
|
key: test_mcc
|
|
value: [0.4472136 1. 0.33333333 0.70710678 0.33333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 1. 0.66666667 0.83333333 0.66666667 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 1. 0.66666667 0.8 0.66666667 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.66666667 1. 0.66666667 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 1. 0.66666667 0.66666667 0.66666667 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 1. 0.66666667 0.83333333 0.66666667 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 1. 0.5 0.66666667 0.5 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.95
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03153086 0.02778888 0.03016424 0.03284216 0.04811215 0.02667236
|
|
0.03099704 0.03483891 0.03757524 0.02810836]
|
|
|
|
mean value: 0.03286302089691162
|
|
|
|
key: score_time
|
|
value: [0.02119184 0.0220542 0.02267241 0.03023434 0.02344704 0.00497055
|
|
0.01071739 0.03668547 0.02092147 0.01624227]
|
|
|
|
mean value: 0.0209136962890625
|
|
|
|
key: test_mcc
|
|
value: [0.70710678 0.70710678 0.33333333 0.70710678 0.33333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.96225045 1. 1. 1. 1. 1.
|
|
0.96225045 1. 0.96296296 1. ]
|
|
|
|
mean value: 0.9887463860261716
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.83333333 0.66666667 0.83333333 0.66666667 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98076923 1. 1. 1. 1. 1.
|
|
0.98076923 1. 0.98113208 1. ]
|
|
|
|
mean value: 0.994267053701016
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.85714286 0.66666667 0.8 0.66666667 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98039216 1. 1. 1. 1. 1.
|
|
0.98039216 1. 0.98113208 1. ]
|
|
|
|
mean value: 0.9941916389197188
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 0.66666667 1. 0.66666667 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.66666667 0.66666667 0.66666667 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.96153846 1. 1. 1. 1. 1.
|
|
0.96153846 1. 0.96296296 1. ]
|
|
|
|
mean value: 0.9886039886039886
|
|
|
|
key: test_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.83333333 0.83333333 0.66666667 0.83333333 0.66666667 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98076923 1. 1. 1. 1. 1.
|
|
0.98076923 1. 0.98148148 1. ]
|
|
|
|
mean value: 0.9943019943019943
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.75 0.5 0.66666667 0.5 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.96153846 1. 1. 1. 1. 1.
|
|
0.96153846 1. 0.96296296 1. ]
|
|
|
|
mean value: 0.9886039886039886
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01145601 0.01071548 0.01049471 0.01030135 0.01034331 0.01053452
|
|
0.01219153 0.0107038 0.01105452 0.01032877]
|
|
|
|
mean value: 0.01081240177154541
|
|
|
|
key: score_time
|
|
value: [0.00883389 0.00875664 0.00957775 0.00913954 0.00889802 0.00447464
|
|
0.0049305 0.00890326 0.0089941 0.00859141]
|
|
|
|
mean value: 0.008109974861145019
|
|
|
|
key: test_mcc
|
|
value: [-0.33333333 0. 0. 0. 0. nan
|
|
nan -0.33333333 0.61237244 0.16666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.33333333 0.5 0.5 0.5 0.5 nan
|
|
nan 0.33333333 0.8 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.57142857 0.66666667 0.4 0.4 nan
|
|
nan 0.33333333 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.33333333 0.5 0.5 0.5 0.5 nan
|
|
nan 0.33333333 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 1. 0.33333333 0.33333333 nan
|
|
nan 0.33333333 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.33333333 0.5 0.5 0.5 0.5 nan
|
|
nan 0.33333333 0.75 0.58333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.4 0.5 0.25 0.25 nan nan 0.2 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11957264 0.14153457 0.10522366 0.10514927 0.10289383 0.12513161
|
|
0.12388921 0.12803459 0.12305784 0.10899067]
|
|
|
|
mean value: 0.11834778785705566
|
|
|
|
key: score_time
|
|
value: [0.00914145 0.00893044 0.00910687 0.00897527 0.00912714 0.00470376
|
|
0.004632 0.00898361 0.00916672 0.00901628]
|
|
|
|
mean value: 0.008178353309631348
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.33333333 0.70710678 0.70710678 nan
|
|
nan 0.70710678 1. 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.66666667 0.83333333 0.83333333 nan
|
|
nan 0.83333333 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.66666667 0.85714286 0.8 nan
|
|
nan 0.8 1. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.66666667 0.75 1. nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. 0.66666667 nan
|
|
nan 0.66666667 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.66666667 0.83333333 0.83333333 nan
|
|
nan 0.83333333 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.5 0.75 0.66666667 nan
|
|
nan 0.66666667 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.00893831 0.00888896 0.00908279 0.00880885 0.00884104 0.00885582
|
|
0.00883555 0.00878549 0.00888848 0.01262331]
|
|
|
|
mean value: 0.009254860877990722
|
|
|
|
key: score_time
|
|
value: [0.00863051 0.00949979 0.00870037 0.00860357 0.00859833 0.00480151
|
|
0.00421071 0.00872087 0.00892401 0.01160169]
|
|
|
|
mean value: 0.00822913646697998
|
|
|
|
key: test_mcc
|
|
value: [ 0.33333333 0. -0.33333333 0.70710678 0. nan
|
|
nan 0. 0.40824829 -0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.5 0.33333333 0.83333333 0.5 nan
|
|
nan 0.5 0.6 0.2 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.57142857 0.33333333 0.85714286 0.4 nan
|
|
nan 0. 0.66666667 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.5 0.33333333 0.75 0.5 nan
|
|
nan 0. 0.5 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.33333333 1. 0.33333333 nan
|
|
nan 0. 1. 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.5 0.33333333 0.83333333 0.5 nan
|
|
nan 0.5 0.66666667 0.25 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.4 0.2 0.75 0.25 nan nan 0. 0.5 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.14
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01256394 0.01242661 0.01233029 0.01234102 0.01236892 0.01238871
|
|
0.01237059 0.01262426 0.01242614 0.0123601 ]
|
|
|
|
mean value: 0.012420058250427246
|
|
|
|
key: score_time
|
|
value: [0.01140499 0.01143169 0.01140809 0.01140833 0.01138973 0.00616193
|
|
0.0060811 0.01142287 0.01136637 0.01145077]
|
|
|
|
mean value: 0.010352587699890137
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.70710678 0.70710678 0. 0. nan
|
|
nan 1. 1. 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.83333333 0.83333333 0.5 0.5 nan
|
|
nan 1. 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.85714286 0.85714286 0.57142857 0.4 nan
|
|
nan 1. 1. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.75 0.5 0.5 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 0.66666667 0.33333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.83333333 0.83333333 0.5 0.5 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.75 0.75 0.4 0.25 nan nan 1. 1. 0.75]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:176: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:179: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.07766771 0.07815814 0.07832098 0.08276105 0.10114908 0.09942913
|
|
0.08842373 0.0795567 0.10013795 0.07987666]
|
|
|
|
mean value: 0.086548113822937
|
|
|
|
key: score_time
|
|
value: [0.01170754 0.01169586 0.01166344 0.01162314 0.01418304 0.00657368
|
|
0.00637412 0.01173091 0.01183128 0.01019073]
|
|
|
|
mean value: 0.010757374763488769
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.70710678 0.70710678 -0.33333333 0. nan
|
|
nan 1. 1. 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.88527041 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9885270412757426
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.83333333 0.83333333 0.33333333 0.5 nan
|
|
nan 1. 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.94230769 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9942307692307693
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.85714286 0.85714286 0.33333333 0.4 nan
|
|
nan 1. 1. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.94117647 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9941176470588236
|
|
|
|
key: test_precision
|
|
value: [0.5 0.75 0.75 0.33333333 0.5 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.96 1. 1. 1. 1. 1. 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.996
|
|
|
|
key: test_recall
|
|
value: [0.33333333 1. 1. 0.33333333 0.33333333 nan
|
|
nan 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.92307692 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9923076923076923
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.83333333 0.83333333 0.33333333 0.5 nan
|
|
nan 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.94230769 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9942307692307693
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.75 0.75 0.2 0.25 nan nan 1. 1. 0.75]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.88888889 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9888888888888889
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02820563 0.02576756 0.02641773 0.02557731 0.02693248 0.02358747
|
|
0.0255456 0.02283812 0.02356815 0.02982664]
|
|
|
|
mean value: 0.025826668739318846
|
|
|
|
key: score_time
|
|
value: [0.01189876 0.01176596 0.01171589 0.01177764 0.01182795 0.01168847
|
|
0.01162362 0.01171899 0.01163292 0.01207137]
|
|
|
|
mean value: 0.01177215576171875
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 0.5 0.21821789 0.81649658 0.21821789
|
|
0.6 0.81649658 0.81649658 1. ]
|
|
|
|
mean value: 0.6618918685110615
|
|
|
|
key: train_mcc
|
|
value: [0.91201231 0.93356387 0.93356387 0.91201231 0.88910845 0.91111111
|
|
0.88910845 0.88910845 0.95555556 0.88910845]
|
|
|
|
mean value: 0.9114252823784718
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.7 0.6 0.9 0.6 0.8 0.9 0.9 1. ]
|
|
|
|
mean value: 0.8200000000000001
|
|
|
|
key: train_accuracy
|
|
value: [0.95555556 0.96666667 0.96666667 0.95555556 0.94444444 0.95555556
|
|
0.94444444 0.94444444 0.97777778 0.94444444]
|
|
|
|
mean value: 0.9555555555555556
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.57142857 0.66666667 0.90909091 0.66666667
|
|
0.8 0.90909091 0.88888889 1. ]
|
|
|
|
mean value: 0.823001443001443
|
|
|
|
key: train_fscore
|
|
value: [0.95454545 0.96703297 0.96703297 0.95652174 0.94505495 0.95555556
|
|
0.94382022 0.94505495 0.97777778 0.94382022]
|
|
|
|
mean value: 0.955621680062325
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 1. 0.57142857 0.83333333 0.57142857
|
|
0.8 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.8276190476190476
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
train_precision
|
|
value: [0.97674419 0.95652174 0.95652174 0.93617021 0.93478261 0.95555556
|
|
0.95454545 0.93478261 0.97777778 0.95454545]
|
|
|
|
mean value: 0.9537947336888886
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.4 0.8 1. 0.8 0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.86
|
|
|
|
key: train_recall
|
|
value: [0.93333333 0.97777778 0.97777778 0.97777778 0.95555556 0.95555556
|
|
0.93333333 0.95555556 0.97777778 0.93333333]
|
|
|
|
mean value: 0.9577777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.7 0.6 0.9 0.6 0.8 0.9 0.9 1. ]
|
|
|
|
mean value: 0.8200000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.95555556 0.96666667 0.96666667 0.95555556 0.94444444 0.95555556
|
|
0.94444444 0.94444444 0.97777778 0.94444444]
|
|
|
|
mean value: 0.9555555555555556
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.4 0.5 0.83333333 0.5
|
|
0.66666667 0.83333333 0.8 1. ]
|
|
|
|
mean value: 0.72
|
|
|
|
key: train_jcc
|
|
value: [0.91304348 0.93617021 0.93617021 0.91666667 0.89583333 0.91489362
|
|
0.89361702 0.89583333 0.95652174 0.89361702]
|
|
|
|
mean value: 0.9152366635831021
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.80905724 0.7472713 0.62414098 0.64533067 0.73280716 0.58336401
|
|
0.60610628 0.74971604 0.74895 0.61796379]
|
|
|
|
mean value: 0.6864707469940186
|
|
|
|
key: score_time
|
|
value: [0.01288104 0.01487541 0.01242852 0.01178455 0.01245666 0.01180792
|
|
0.01207376 0.01235223 0.01500702 0.01522446]
|
|
|
|
mean value: 0.013089156150817871
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.81649658 0.21821789 0.5 0.81649658 0.21821789
|
|
0.81649658 0.81649658 0.6 1. ]
|
|
|
|
mean value: 0.6457075774890866
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.95555556 1. 0.95555556 0.95555556
|
|
0.93356387 1. 1. 1. ]
|
|
|
|
mean value: 0.980023053806288
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 0.6 0.7 0.9 0.6 0.9 0.9 0.8 1. ]
|
|
|
|
mean value: 0.81
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.97777778 1. 0.97777778 0.97777778
|
|
0.96666667 1. 1. 1. ]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.90909091 0.5 0.76923077 0.90909091 0.66666667
|
|
0.88888889 0.90909091 0.8 1. ]
|
|
|
|
mean value: 0.8185392385392385
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.97777778 1. 0.97777778 0.97777778
|
|
0.96629213 1. 1. 1. ]
|
|
|
|
mean value: 0.9899625468164794
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 0.66666667 0.625 0.83333333 0.57142857
|
|
1. 0.83333333 0.8 1. ]
|
|
|
|
mean value: 0.7877380952380952
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.97777778 1. 0.97777778 0.97777778
|
|
0.97727273 1. 1. 1. ]
|
|
|
|
mean value: 0.9910606060606061
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.4 1. 1. 0.8 0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.88
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.97777778 1. 0.97777778 0.97777778
|
|
0.95555556 1. 1. 1. ]
|
|
|
|
mean value: 0.9888888888888889
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.9 0.6 0.7 0.9 0.6 0.9 0.9 0.8 1. ]
|
|
|
|
mean value: 0.81
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.97777778 1. 0.97777778 0.97777778
|
|
0.96666667 1. 1. 1. ]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.83333333 0.33333333 0.625 0.83333333 0.5
|
|
0.8 0.83333333 0.66666667 1. ]
|
|
|
|
mean value: 0.7139285714285715
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.95652174 1. 0.95652174 0.95652174
|
|
0.93478261 1. 1. 1. ]
|
|
|
|
mean value: 0.9804347826086957
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0124836 0.00930858 0.00983167 0.00957084 0.00956893 0.00951123
|
|
0.00926447 0.0093565 0.00924706 0.00938511]
|
|
|
|
mean value: 0.009752798080444335
|
|
|
|
key: score_time
|
|
value: [0.01172256 0.00908756 0.00973558 0.00952482 0.00956821 0.0095439
|
|
0.0094943 0.00932264 0.00934172 0.00925255]
|
|
|
|
mean value: 0.009659385681152344
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.5 0.21821789 0.21821789 0.6 0.
|
|
0.40824829 0.40824829 0.65465367 0.6 ]
|
|
|
|
mean value: 0.4262239702815665
|
|
|
|
key: train_mcc
|
|
value: [0.77777778 0.57642872 0.56980288 0.75724019 0.55708601 0.67488191
|
|
0.67488191 0.65025037 0.5500191 0.65487619]
|
|
|
|
mean value: 0.6443245048315153
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.7 0.6 0.6 0.8 0.5 0.7 0.7 0.8 0.8]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_accuracy
|
|
value: [0.88888889 0.77777778 0.77777778 0.87777778 0.76666667 0.83333333
|
|
0.83333333 0.82222222 0.76666667 0.82222222]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.76923077 0.5 0.66666667 0.8 0.66666667
|
|
0.72727273 0.72727273 0.83333333 0.8 ]
|
|
|
|
mean value: 0.7323776223776224
|
|
|
|
key: train_fscore
|
|
value: [0.88888889 0.80392157 0.8 0.88172043 0.7961165 0.84536082
|
|
0.84536082 0.83333333 0.79207921 0.83673469]
|
|
|
|
mean value: 0.8323516277094448
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.625 0.66666667 0.57142857 0.8 0.5
|
|
0.66666667 0.66666667 0.71428571 0.8 ]
|
|
|
|
mean value: 0.6725
|
|
|
|
key: train_precision
|
|
value: [0.88888889 0.71929825 0.72727273 0.85416667 0.70689655 0.78846154
|
|
0.78846154 0.78431373 0.71428571 0.77358491]
|
|
|
|
mean value: 0.774563050252582
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.4 0.8 0.8 1. 0.8 0.8 1. 0.8]
|
|
|
|
mean value: 0.8400000000000001
|
|
|
|
key: train_recall
|
|
value: [0.88888889 0.91111111 0.88888889 0.91111111 0.91111111 0.91111111
|
|
0.91111111 0.88888889 0.88888889 0.91111111]
|
|
|
|
mean value: 0.9022222222222221
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.7 0.6 0.6 0.8 0.5 0.7 0.7 0.8 0.8]
|
|
|
|
mean value: 0.7000000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.88888889 0.77777778 0.77777778 0.87777778 0.76666667 0.83333333
|
|
0.83333333 0.82222222 0.76666667 0.82222222]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.625 0.33333333 0.5 0.66666667 0.5
|
|
0.57142857 0.57142857 0.71428571 0.66666667]
|
|
|
|
mean value: 0.5863095238095238
|
|
|
|
key: train_jcc
|
|
value: [0.8 0.67213115 0.66666667 0.78846154 0.66129032 0.73214286
|
|
0.73214286 0.71428571 0.6557377 0.71929825]
|
|
|
|
mean value: 0.714215705435333
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01002598 0.00972629 0.00943542 0.009835 0.00988364 0.00961804
|
|
0.00996184 0.00956464 0.00955367 0.00944734]
|
|
|
|
mean value: 0.009705185890197754
|
|
|
|
key: score_time
|
|
value: [0.0102427 0.00930071 0.00941706 0.00960851 0.00960922 0.00931358
|
|
0.00965929 0.00924778 0.00924873 0.00928211]
|
|
|
|
mean value: 0.009492969512939453
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.40824829 0. 0.40824829 0.40824829 0.6
|
|
0.40824829 0.2 0.40824829 0. ]
|
|
|
|
mean value: 0.3657738033247041
|
|
|
|
key: train_mcc
|
|
value: [0.62609903 0.58137767 0.60540551 0.58137767 0.62360956 0.71269665
|
|
0.60238451 0.58969198 0.6681531 0.68957028]
|
|
|
|
mean value: 0.6280365978811131
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5]
|
|
|
|
mean value: 0.6799999999999999
|
|
|
|
key: train_accuracy
|
|
value: [0.81111111 0.78888889 0.8 0.78888889 0.81111111 0.85555556
|
|
0.8 0.78888889 0.83333333 0.84444444]
|
|
|
|
mean value: 0.8122222222222222
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.66666667 0.28571429 0.72727273 0.66666667 0.8
|
|
0.72727273 0.6 0.66666667 0.44444444]
|
|
|
|
mean value: 0.6493795093795094
|
|
|
|
key: train_fscore
|
|
value: [0.8 0.77647059 0.78571429 0.77647059 0.8045977 0.85057471
|
|
0.79069767 0.7654321 0.82758621 0.84090909]
|
|
|
|
mean value: 0.8018452946967656
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.75 0.5 0.66666667 0.75 0.8
|
|
0.66666667 0.6 0.75 0.5 ]
|
|
|
|
mean value: 0.6816666666666666
|
|
|
|
key: train_precision
|
|
value: [0.85 0.825 0.84615385 0.825 0.83333333 0.88095238
|
|
0.82926829 0.86111111 0.85714286 0.86046512]
|
|
|
|
mean value: 0.8468426937655525
|
|
|
|
key: test_recall
|
|
value: [1. 0.6 0.2 0.8 0.6 0.8 0.8 0.6 0.6 0.4]
|
|
|
|
mean value: 0.64
|
|
|
|
key: train_recall
|
|
value: [0.75555556 0.73333333 0.73333333 0.73333333 0.77777778 0.82222222
|
|
0.75555556 0.68888889 0.8 0.82222222]
|
|
|
|
mean value: 0.7622222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5]
|
|
|
|
mean value: 0.68
|
|
|
|
key: train_roc_auc
|
|
value: [0.81111111 0.78888889 0.8 0.78888889 0.81111111 0.85555556
|
|
0.8 0.78888889 0.83333333 0.84444444]
|
|
|
|
mean value: 0.8122222222222222
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.5 0.16666667 0.57142857 0.5 0.66666667
|
|
0.57142857 0.42857143 0.5 0.28571429]
|
|
|
|
mean value: 0.5023809523809524
|
|
|
|
key: train_jcc
|
|
value: [0.66666667 0.63461538 0.64705882 0.63461538 0.67307692 0.74
|
|
0.65384615 0.62 0.70588235 0.7254902 ]
|
|
|
|
mean value: 0.6701251885369532
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00937963 0.0080359 0.00806713 0.00866961 0.00906754 0.00908542
|
|
0.00891066 0.00935316 0.009197 0.00908399]
|
|
|
|
mean value: 0.008885002136230469
|
|
|
|
key: score_time
|
|
value: [0.00974846 0.0091846 0.00914931 0.00993633 0.0101018 0.00967264
|
|
0.01007986 0.01016092 0.01003218 0.00999641]
|
|
|
|
mean value: 0.009806251525878907
|
|
|
|
key: test_mcc
|
|
value: [ 0.81649658 0.2 0.21821789 -0.21821789 0. 0.40824829
|
|
0.40824829 0.6 -0.6 0.5 ]
|
|
|
|
mean value: 0.2332993161855452
|
|
|
|
key: train_mcc
|
|
value: [0.64700558 0.6 0.64700558 0.51111111 0.64508188 0.69162666
|
|
0.62360956 0.55555556 0.48900965 0.62237591]
|
|
|
|
mean value: 0.6032381499326708
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.6 0.6 0.4 0.5 0.7 0.7 0.8 0.2 0.7]
|
|
|
|
mean value: 0.61
|
|
|
|
key: train_accuracy
|
|
value: [0.82222222 0.8 0.82222222 0.75555556 0.82222222 0.84444444
|
|
0.81111111 0.77777778 0.74444444 0.81111111]
|
|
|
|
mean value: 0.8011111111111111
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.6 0.5 0.5 0.54545455 0.72727273
|
|
0.72727273 0.8 0.2 0.57142857]
|
|
|
|
mean value: 0.6080519480519481
|
|
|
|
key: train_fscore
|
|
value: [0.82978723 0.8 0.82978723 0.75555556 0.82608696 0.85106383
|
|
0.8045977 0.77777778 0.74157303 0.80898876]
|
|
|
|
mean value: 0.8025218086629647
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.6 0.66666667 0.42857143 0.5 0.66666667
|
|
0.66666667 0.8 0.2 1. ]
|
|
|
|
mean value: 0.6361904761904762
|
|
|
|
key: train_precision
|
|
value: [0.79591837 0.8 0.79591837 0.75555556 0.80851064 0.81632653
|
|
0.83333333 0.77777778 0.75 0.81818182]
|
|
|
|
mean value: 0.795152238845248
|
|
|
|
key: test_recall
|
|
value: [1. 0.6 0.4 0.6 0.6 0.8 0.8 0.8 0.2 0.4]
|
|
|
|
mean value: 0.62
|
|
|
|
key: train_recall
|
|
value: [0.86666667 0.8 0.86666667 0.75555556 0.84444444 0.88888889
|
|
0.77777778 0.77777778 0.73333333 0.8 ]
|
|
|
|
mean value: 0.8111111111111111
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.6 0.6 0.4 0.5 0.7 0.7 0.8 0.2 0.7]
|
|
|
|
mean value: 0.61
|
|
|
|
key: train_roc_auc
|
|
value: [0.82222222 0.8 0.82222222 0.75555556 0.82222222 0.84444444
|
|
0.81111111 0.77777778 0.74444444 0.81111111]
|
|
|
|
mean value: 0.8011111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.42857143 0.33333333 0.33333333 0.375 0.57142857
|
|
0.57142857 0.66666667 0.11111111 0.4 ]
|
|
|
|
mean value: 0.4624206349206349
|
|
|
|
key: train_jcc
|
|
value: [0.70909091 0.66666667 0.70909091 0.60714286 0.7037037 0.74074074
|
|
0.67307692 0.63636364 0.58928571 0.67924528]
|
|
|
|
mean value: 0.6714407343180928
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01032925 0.01031327 0.01048875 0.0095973 0.00918651 0.01025271
|
|
0.01038647 0.00908804 0.00948262 0.01041341]
|
|
|
|
mean value: 0.009953832626342774
|
|
|
|
key: score_time
|
|
value: [0.00930023 0.0093739 0.00958729 0.00855732 0.00871849 0.00915527
|
|
0.00932598 0.00866413 0.00870752 0.00956011]
|
|
|
|
mean value: 0.009095025062561036
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 0.21821789 0.6 0.40824829 0.40824829
|
|
0.40824829 0.6 0.6 0.65465367]
|
|
|
|
mean value: 0.553060959419101
|
|
|
|
key: train_mcc
|
|
value: [0.8675239 0.84632727 0.84465303 0.84465303 0.88910845 0.93356387
|
|
0.8230355 0.86666667 0.84465303 0.78086881]
|
|
|
|
mean value: 0.854105354455519
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.6 0.8 0.7 0.7 0.7 0.8 0.8 0.8]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_accuracy
|
|
value: [0.93333333 0.92222222 0.92222222 0.92222222 0.94444444 0.96666667
|
|
0.91111111 0.93333333 0.92222222 0.88888889]
|
|
|
|
mean value: 0.9266666666666666
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.5 0.8 0.66666667 0.72727273
|
|
0.72727273 0.8 0.8 0.75 ]
|
|
|
|
mean value: 0.7589393939393939
|
|
|
|
key: train_fscore
|
|
value: [0.93181818 0.91954023 0.92134831 0.92307692 0.94382022 0.96629213
|
|
0.90909091 0.93333333 0.92307692 0.88372093]
|
|
|
|
mean value: 0.925511810467119
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.66666667 0.8 0.75 0.66666667
|
|
0.66666667 0.8 0.8 1. ]
|
|
|
|
mean value: 0.7816666666666667
|
|
|
|
key: train_precision
|
|
value: [0.95348837 0.95238095 0.93181818 0.91304348 0.95454545 0.97727273
|
|
0.93023256 0.93333333 0.91304348 0.92682927]
|
|
|
|
mean value: 0.938598780439763
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.4 0.8 0.6 0.8 0.8 0.8 0.8 0.6]
|
|
|
|
mean value: 0.76
|
|
|
|
key: train_recall
|
|
value: [0.91111111 0.88888889 0.91111111 0.93333333 0.93333333 0.95555556
|
|
0.88888889 0.93333333 0.93333333 0.84444444]
|
|
|
|
mean value: 0.9133333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.6 0.8 0.7 0.7 0.7 0.8 0.8 0.8]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_roc_auc
|
|
value: [0.93333333 0.92222222 0.92222222 0.92222222 0.94444444 0.96666667
|
|
0.91111111 0.93333333 0.92222222 0.88888889]
|
|
|
|
mean value: 0.9266666666666666
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.33333333 0.66666667 0.5 0.57142857
|
|
0.57142857 0.66666667 0.66666667 0.6 ]
|
|
|
|
mean value: 0.6242857142857142
|
|
|
|
key: train_jcc
|
|
value: [0.87234043 0.85106383 0.85416667 0.85714286 0.89361702 0.93478261
|
|
0.83333333 0.875 0.85714286 0.79166667]
|
|
|
|
mean value: 0.8620256266243778
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.37708092 0.3637991 0.45402408 0.35582471 0.38116431 0.36729598
|
|
0.41565084 0.37348127 0.51866865 0.39506054]
|
|
|
|
mean value: 0.40020503997802737
|
|
|
|
key: score_time
|
|
value: [0.01199031 0.01207948 0.0120008 0.01200986 0.01195621 0.01205134
|
|
0.01203179 0.01206517 0.01206875 0.0120163 ]
|
|
|
|
mean value: 0.01202700138092041
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.81649658 0.40824829 0.40824829 0.6 0.65465367
|
|
0.81649658 0.81649658 0.40824829 1. ]
|
|
|
|
mean value: 0.6583541955590722
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.7 1. ]
|
|
|
|
mean value: 0.8200000000000001
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.90909091 0.66666667 0.72727273 0.8 0.83333333
|
|
0.88888889 0.90909091 0.72727273 1. ]
|
|
|
|
mean value: 0.8294949494949495
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 0.75 0.66666667 0.8 0.71428571
|
|
1. 0.83333333 0.66666667 1. ]
|
|
|
|
mean value: 0.7978571428571428
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.6 0.8 0.8 1. 0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.88
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.7 1. ]
|
|
|
|
mean value: 0.8200000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.83333333 0.5 0.57142857 0.66666667 0.71428571
|
|
0.8 0.83333333 0.57142857 1. ]
|
|
|
|
mean value: 0.7204761904761905
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01760101 0.01309419 0.01132631 0.01077938 0.010427 0.01067376
|
|
0.01073265 0.01048017 0.01065993 0.01080036]
|
|
|
|
mean value: 0.011657476425170898
|
|
|
|
key: score_time
|
|
value: [0.01169825 0.00983357 0.00959349 0.00917745 0.00919628 0.00912952
|
|
0.00917816 0.00921082 0.00917602 0.00917816]
|
|
|
|
mean value: 0.009537172317504884
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.65465367 1. 0.81649658 0.81649658 1.
|
|
0.40824829 0.81649658 0.65465367 1. ]
|
|
|
|
mean value: 0.7983541955590722
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 1. 0.9 0.9 1. 0.7 0.9 0.8 1. ]
|
|
|
|
mean value: 0.89
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.83333333 1. 0.90909091 0.88888889 1.
|
|
0.72727273 0.90909091 0.83333333 1. ]
|
|
|
|
mean value: 0.901010101010101
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.71428571 1. 0.83333333 1. 1.
|
|
0.66666667 0.83333333 0.71428571 1. ]
|
|
|
|
mean value: 0.8595238095238096
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.8 1. 0.8 1. 1. 1. ]
|
|
|
|
mean value: 0.96
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 1. 0.9 0.9 1. 0.7 0.9 0.8 1. ]
|
|
|
|
mean value: 0.89
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.71428571 1. 0.83333333 0.8 1.
|
|
0.57142857 0.83333333 0.71428571 1. ]
|
|
|
|
mean value: 0.8300000000000001
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08497882 0.08490729 0.08530784 0.08088517 0.07980752 0.08026528
|
|
0.08012891 0.07985377 0.0795064 0.07973003]
|
|
|
|
mean value: 0.08153710365295411
|
|
|
|
key: score_time
|
|
value: [0.01674938 0.01818061 0.0167861 0.01685762 0.01680422 0.01698256
|
|
0.01682544 0.01685238 0.01678157 0.01684117]
|
|
|
|
mean value: 0.01696610450744629
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.65465367 0.65465367 0.81649658 0.65465367 0.40824829
|
|
0.6 0.81649658 0.6 1. ]
|
|
|
|
mean value: 0.6859856135151223
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.8 0.8 0.9 0.8 0.7 0.8 0.9 0.8 1. ]
|
|
|
|
mean value: 0.8300000000000001
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.83333333 0.75 0.90909091 0.75 0.72727273
|
|
0.8 0.88888889 0.8 1. ]
|
|
|
|
mean value: 0.8291919191919191
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.71428571 1. 0.83333333 1. 0.66666667
|
|
0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.8528571428571429
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.6 1. 0.6 0.8 0.8 0.8 0.8 1. ]
|
|
|
|
mean value: 0.84
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.8 0.8 0.9 0.8 0.7 0.8 0.9 0.8 1. ]
|
|
|
|
mean value: 0.8300000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.71428571 0.6 0.83333333 0.6 0.57142857
|
|
0.66666667 0.8 0.66666667 1. ]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00878644 0.00890398 0.0085156 0.00929928 0.00923872 0.00895572
|
|
0.00925231 0.00933337 0.00873685 0.00847316]
|
|
|
|
mean value: 0.008949542045593261
|
|
|
|
key: score_time
|
|
value: [0.00883341 0.00892162 0.00849938 0.00916171 0.00890326 0.00888228
|
|
0.00898838 0.00917315 0.00845098 0.00846076]
|
|
|
|
mean value: 0.008827495574951171
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.65465367 0.40824829 0.81649658 0.6 0.21821789
|
|
0.65465367 0.6 0.40824829 0. ]
|
|
|
|
mean value: 0.5177014974435125
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 0.7 0.9 0.8 0.6 0.8 0.8 0.7 0.5]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.83333333 0.72727273 0.90909091 0.8 0.66666667
|
|
0.83333333 0.8 0.72727273 0.54545455]
|
|
|
|
mean value: 0.7751515151515151
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.71428571 0.66666667 0.83333333 0.8 0.57142857
|
|
0.71428571 0.8 0.66666667 0.5 ]
|
|
|
|
mean value: 0.71
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.8 1. 0.8 0.8 1. 0.8 0.8 0.6]
|
|
|
|
mean value: 0.86
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 0.7 0.9 0.8 0.6 0.8 0.8 0.7 0.5]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.71428571 0.57142857 0.83333333 0.66666667 0.5
|
|
0.71428571 0.66666667 0.57142857 0.375 ]
|
|
|
|
mean value: 0.6446428571428572
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.02410698 1.02078891 1.02329874 1.02577353 1.02832651 1.0975976
|
|
1.09256458 1.01587534 1.0089438 1.00883484]
|
|
|
|
mean value: 1.0346110820770265
|
|
|
|
key: score_time
|
|
value: [0.09517384 0.09400702 0.09465933 0.09498215 0.09645772 0.0963223
|
|
0.09489012 0.09161878 0.09524989 0.09460068]
|
|
|
|
mean value: 0.09479618072509766
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 0.65465367 0.81649658 0.65465367 0.65465367
|
|
0.81649658 0.81649658 0.6 0.81649658]
|
|
|
|
mean value: 0.7462940497690288
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.8 0.9 0.8 0.8 0.9 0.9 0.8 0.9]
|
|
|
|
mean value: 0.86
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.75 0.90909091 0.75 0.83333333
|
|
0.88888889 0.88888889 0.8 0.88888889]
|
|
|
|
mean value: 0.8527272727272728
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 1. 0.83333333 1. 0.71428571
|
|
1. 1. 0.8 1. ]
|
|
|
|
mean value: 0.9014285714285715
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.6 1. 0.6 1. 0.8 0.8 0.8 0.8]
|
|
|
|
mean value: 0.84
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.8 0.9 0.8 0.8 0.9 0.9 0.8 0.9]
|
|
|
|
mean value: 0.86
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.6 0.83333333 0.6 0.71428571
|
|
0.8 0.8 0.66666667 0.8 ]
|
|
|
|
mean value: 0.7480952380952381
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.84236526 0.86939478 0.84446883 0.89900541 0.81594372 0.85574555
|
|
0.87060905 0.88659859 0.84814668 0.80270219]
|
|
|
|
mean value: 0.8534980058670044
|
|
|
|
key: score_time
|
|
value: [0.18092632 0.22178912 0.15738559 0.22907305 0.1698842 0.20201349
|
|
0.20945191 0.231884 0.2336936 0.21163177]
|
|
|
|
mean value: 0.20477330684661865
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 1. 0.65465367 0.81649658 0.81649658 0.65465367
|
|
0.40824829 0.81649658 0.6 0.81649658]
|
|
|
|
mean value: 0.7400038536518447
|
|
|
|
key: train_mcc
|
|
value: [0.97801929 0.97801929 0.95555556 0.97801929 0.97801929 0.95555556
|
|
0.95555556 0.95555556 0.93356387 0.95555556]
|
|
|
|
mean value: 0.9623418824548596
|
|
|
|
key: test_accuracy
|
|
value: [0.9 1. 0.8 0.9 0.9 0.8 0.7 0.9 0.8 0.9]
|
|
|
|
mean value: 0.86
|
|
|
|
key: train_accuracy
|
|
value: [0.98888889 0.98888889 0.97777778 0.98888889 0.98888889 0.97777778
|
|
0.97777778 0.97777778 0.96666667 0.97777778]
|
|
|
|
mean value: 0.9811111111111112
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 1. 0.75 0.90909091 0.88888889 0.83333333
|
|
0.72727273 0.88888889 0.8 0.88888889]
|
|
|
|
mean value: 0.8595454545454545
|
|
|
|
key: train_fscore
|
|
value: [0.98876404 0.98901099 0.97777778 0.98901099 0.98901099 0.97777778
|
|
0.97777778 0.97777778 0.96629213 0.97777778]
|
|
|
|
mean value: 0.9810978035697137
|
|
|
|
key: test_precision
|
|
value: [0.83333333 1. 1. 0.83333333 1. 0.71428571
|
|
0.66666667 1. 0.8 1. ]
|
|
|
|
mean value: 0.8847619047619047
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 0.97777778 0.97826087 0.97826087 0.97777778
|
|
0.97777778 0.97777778 0.97727273 0.97777778]
|
|
|
|
mean value: 0.9800944224857269
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.6 1. 0.8 1. 0.8 0.8 0.8 0.8]
|
|
|
|
mean value: 0.86
|
|
|
|
key: train_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.97777778 1. 0.97777778 1. 1. 0.97777778
|
|
0.97777778 0.97777778 0.95555556 0.97777778]
|
|
|
|
mean value: 0.9822222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 1. 0.8 0.9 0.9 0.8 0.7 0.9 0.8 0.9]
|
|
|
|
mean value: 0.86
|
|
|
|
key: train_roc_auc
|
|
value: [0.98888889 0.98888889 0.97777778 0.98888889 0.98888889 0.97777778
|
|
0.97777778 0.97777778 0.96666667 0.97777778]
|
|
|
|
mean value: 0.981111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 1. 0.6 0.83333333 0.8 0.71428571
|
|
0.57142857 0.8 0.66666667 0.8 ]
|
|
|
|
mean value: 0.7619047619047619
|
|
|
|
key: train_jcc
|
|
value: [0.97777778 0.97826087 0.95652174 0.97826087 0.97826087 0.95652174
|
|
0.95652174 0.95652174 0.93478261 0.95652174]
|
|
|
|
mean value: 0.9629951690821257
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02122283 0.0088222 0.00864434 0.00867176 0.00868058 0.00866842
|
|
0.00870466 0.00863767 0.00891924 0.00864077]
|
|
|
|
mean value: 0.009961247444152832
|
|
|
|
key: score_time
|
|
value: [0.01298022 0.00939631 0.00880861 0.00860405 0.0086031 0.00863481
|
|
0.00868678 0.00857902 0.00937891 0.00851655]
|
|
|
|
mean value: 0.009218835830688476
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.40824829 0. 0.40824829 0.40824829 0.6
|
|
0.40824829 0.2 0.40824829 0. ]
|
|
|
|
mean value: 0.3657738033247041
|
|
|
|
key: train_mcc
|
|
value: [0.62609903 0.58137767 0.60540551 0.58137767 0.62360956 0.71269665
|
|
0.60238451 0.58969198 0.6681531 0.68957028]
|
|
|
|
mean value: 0.6280365978811131
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5]
|
|
|
|
mean value: 0.6799999999999999
|
|
|
|
key: train_accuracy
|
|
value: [0.81111111 0.78888889 0.8 0.78888889 0.81111111 0.85555556
|
|
0.8 0.78888889 0.83333333 0.84444444]
|
|
|
|
mean value: 0.8122222222222222
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.66666667 0.28571429 0.72727273 0.66666667 0.8
|
|
0.72727273 0.6 0.66666667 0.44444444]
|
|
|
|
mean value: 0.6493795093795094
|
|
|
|
key: train_fscore
|
|
value: [0.8 0.77647059 0.78571429 0.77647059 0.8045977 0.85057471
|
|
0.79069767 0.7654321 0.82758621 0.84090909]
|
|
|
|
mean value: 0.8018452946967656
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.75 0.5 0.66666667 0.75 0.8
|
|
0.66666667 0.6 0.75 0.5 ]
|
|
|
|
mean value: 0.6816666666666666
|
|
|
|
key: train_precision
|
|
value: [0.85 0.825 0.84615385 0.825 0.83333333 0.88095238
|
|
0.82926829 0.86111111 0.85714286 0.86046512]
|
|
|
|
mean value: 0.8468426937655525
|
|
|
|
key: test_recall
|
|
value: [1. 0.6 0.2 0.8 0.6 0.8 0.8 0.6 0.6 0.4]
|
|
|
|
mean value: 0.64
|
|
|
|
key: train_recall
|
|
value: [0.75555556 0.73333333 0.73333333 0.73333333 0.77777778 0.82222222
|
|
0.75555556 0.68888889 0.8 0.82222222]
|
|
|
|
mean value: 0.7622222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5]
|
|
|
|
mean value: 0.68
|
|
|
|
key: train_roc_auc
|
|
value: [0.81111111 0.78888889 0.8 0.78888889 0.81111111 0.85555556
|
|
0.8 0.78888889 0.83333333 0.84444444]
|
|
|
|
mean value: 0.8122222222222222
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.5 0.16666667 0.57142857 0.5 0.66666667
|
|
0.57142857 0.42857143 0.5 0.28571429]
|
|
|
|
mean value: 0.5023809523809524
|
|
|
|
key: train_jcc
|
|
value: [0.66666667 0.63461538 0.64705882 0.63461538 0.67307692 0.74
|
|
0.65384615 0.62 0.70588235 0.7254902 ]
|
|
|
|
mean value: 0.6701251885369532
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.08190846 0.06687713 0.03293014 0.08744478 0.03286195 0.03449225
|
|
0.03584909 0.0576427 0.03408647 0.03423762]
|
|
|
|
mean value: 0.049833059310913086
|
|
|
|
key: score_time
|
|
value: [0.01158047 0.01002455 0.01006317 0.01050115 0.01028323 0.010041
|
|
0.01015139 0.01003408 0.00998139 0.01002693]
|
|
|
|
mean value: 0.010268735885620116
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 0.81649658 0.81649658 1. 1.
|
|
1. 0.81649658 0.81649658 1. ]
|
|
|
|
mean value: 0.8898979485566356
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.9 0.9 1. 1. 1. 0.9 0.9 1. ]
|
|
|
|
mean value: 0.9400000000000001
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.88888889 0.90909091 1. 1.
|
|
1. 0.90909091 0.90909091 1. ]
|
|
|
|
mean value: 0.9434343434343434
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 1. 0.83333333 1. 1.
|
|
1. 0.83333333 0.83333333 1. ]
|
|
|
|
mean value: 0.9166666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.8 1. 1. 1. 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.98
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.9 0.9 1. 1. 1. 0.9 0.9 1. ]
|
|
|
|
mean value: 0.9400000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.8 0.83333333 1. 1.
|
|
1. 0.83333333 0.83333333 1. ]
|
|
|
|
mean value: 0.8966666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0152812 0.02139306 0.04124451 0.04086208 0.04085636 0.04082632
|
|
0.04066205 0.04596925 0.04081917 0.04116321]
|
|
|
|
mean value: 0.0369077205657959
|
|
|
|
key: score_time
|
|
value: [0.01144218 0.01933622 0.0208571 0.02243543 0.01998782 0.02180576
|
|
0.01957369 0.02016187 0.02205682 0.01150918]
|
|
|
|
mean value: 0.018916606903076172
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.65465367 0.40824829 0.65465367 0.2 0.21821789
|
|
1. 0.40824829 0.40824829 0.40824829]
|
|
|
|
mean value: 0.5177014974435125
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 0.97801929 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9978019293843652
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 0.7 0.8 0.6 0.6 1. 0.7 0.7 0.7]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 0.98888889 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9988888888888889
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.83333333 0.66666667 0.83333333 0.6 0.66666667
|
|
1. 0.72727273 0.72727273 0.72727273]
|
|
|
|
mean value: 0.769090909090909
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 0.98876404 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.998876404494382
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.71428571 0.75 0.71428571 0.6 0.57142857
|
|
1. 0.66666667 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7183333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.6 1. 0.6 0.8 1. 0.8 0.8 0.8]
|
|
|
|
mean value: 0.84
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.97777778 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9977777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 0.7 0.8 0.6 0.6 1. 0.7 0.7 0.7]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 0.98888889 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9988888888888889
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.71428571 0.5 0.71428571 0.42857143 0.5
|
|
1. 0.57142857 0.57142857 0.57142857]
|
|
|
|
mean value: 0.6404761904761904
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 0.97777778 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9977777777777778
|
|
|
|
MCC on Blind test: -0.1
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01152873 0.00863695 0.00857687 0.00819945 0.00833392 0.00825739
|
|
0.0083611 0.00821066 0.00823236 0.00819683]
|
|
|
|
mean value: 0.00865342617034912
|
|
|
|
key: score_time
|
|
value: [0.01131368 0.00865769 0.0086081 0.00834394 0.00836015 0.00839067
|
|
0.00829577 0.00829291 0.00828815 0.00827575]
|
|
|
|
mean value: 0.008682680130004884
|
|
|
|
key: test_mcc
|
|
value: [0.6 0.65465367 0. 0.40824829 0.40824829 0.40824829
|
|
0.40824829 0.2 0.40824829 0.5 ]
|
|
|
|
mean value: 0.3995895123027292
|
|
|
|
key: train_mcc
|
|
value: [0.55610507 0.49193496 0.60059347 0.51314236 0.55610507 0.57906602
|
|
0.57906602 0.53452248 0.60238451 0.60238451]
|
|
|
|
mean value: 0.5615304473142139
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.8 0.5 0.7 0.7 0.7 0.7 0.6 0.7 0.7]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_accuracy
|
|
value: [0.77777778 0.74444444 0.8 0.75555556 0.77777778 0.78888889
|
|
0.78888889 0.76666667 0.8 0.8 ]
|
|
|
|
mean value: 0.78
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.75 0.44444444 0.72727273 0.72727273 0.72727273
|
|
0.72727273 0.6 0.66666667 0.57142857]
|
|
|
|
mean value: 0.6741630591630591
|
|
|
|
key: train_fscore
|
|
value: [0.77272727 0.72941176 0.79545455 0.74418605 0.77272727 0.7816092
|
|
0.7816092 0.75862069 0.79069767 0.79069767]
|
|
|
|
mean value: 0.7717741331423581
|
|
|
|
key: test_precision
|
|
value: [0.8 1. 0.5 0.66666667 0.66666667 0.66666667
|
|
0.66666667 0.6 0.75 1. ]
|
|
|
|
mean value: 0.7316666666666667
|
|
|
|
key: train_precision
|
|
value: [0.79069767 0.775 0.81395349 0.7804878 0.79069767 0.80952381
|
|
0.80952381 0.78571429 0.82926829 0.82926829]
|
|
|
|
mean value: 0.801413513221511
|
|
|
|
key: test_recall
|
|
value: [0.8 0.6 0.4 0.8 0.8 0.8 0.8 0.6 0.6 0.4]
|
|
|
|
mean value: 0.66
|
|
|
|
key: train_recall
|
|
value: [0.75555556 0.68888889 0.77777778 0.71111111 0.75555556 0.75555556
|
|
0.75555556 0.73333333 0.75555556 0.75555556]
|
|
|
|
mean value: 0.7444444444444445
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.8 0.5 0.7 0.7 0.7 0.7 0.6 0.7 0.7]
|
|
|
|
mean value: 0.6900000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.77777778 0.74444444 0.8 0.75555556 0.77777778 0.78888889
|
|
0.78888889 0.76666667 0.8 0.8 ]
|
|
|
|
mean value: 0.78
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.6 0.28571429 0.57142857 0.57142857 0.57142857
|
|
0.57142857 0.42857143 0.5 0.4 ]
|
|
|
|
mean value: 0.5166666666666666
|
|
|
|
key: train_jcc
|
|
value: [0.62962963 0.57407407 0.66037736 0.59259259 0.62962963 0.64150943
|
|
0.64150943 0.61111111 0.65384615 0.65384615]
|
|
|
|
mean value: 0.628812557114444
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01000237 0.01344228 0.01320529 0.01263332 0.01426673 0.01389861
|
|
0.01293731 0.01380181 0.01295614 0.01367664]
|
|
|
|
mean value: 0.013082051277160644
|
|
|
|
key: score_time
|
|
value: [0.00835109 0.01133704 0.01144195 0.01147938 0.01144743 0.01136518
|
|
0.011235 0.0112555 0.01120543 0.01124072]
|
|
|
|
mean value: 0.011035871505737305
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 0.40824829 0.40824829 0.81649658 0.5
|
|
0.81649658 0.65465367 0.81649658 1. ]
|
|
|
|
mean value: 0.7053633156274334
|
|
|
|
key: train_mcc
|
|
value: [1. 0.95650071 0.89442719 0.91201231 0.91473203 0.95650071
|
|
0.93356387 0.93541435 0.95555556 0.97801929]
|
|
|
|
mean value: 0.9436726030863619
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.7 0.7 0.9 0.7 0.9 0.8 0.9 1. ]
|
|
|
|
mean value: 0.84
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.97777778 0.94444444 0.95555556 0.95555556 0.97777778
|
|
0.96666667 0.96666667 0.97777778 0.98888889]
|
|
|
|
mean value: 0.9711111111111111
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.66666667 0.72727273 0.90909091 0.76923077
|
|
0.88888889 0.83333333 0.88888889 1. ]
|
|
|
|
mean value: 0.8501554001554001
|
|
|
|
key: train_fscore
|
|
value: [1. 0.97727273 0.94736842 0.95652174 0.95744681 0.97826087
|
|
0.96703297 0.96774194 0.97777778 0.98901099]
|
|
|
|
mean value: 0.9718434234837254
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.75 0.66666667 0.83333333 0.625
|
|
1. 0.71428571 1. 1. ]
|
|
|
|
mean value: 0.825595238095238
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.9 0.93617021 0.91836735 0.95744681
|
|
0.95652174 0.9375 0.97777778 0.97826087]
|
|
|
|
mean value: 0.9562044754688801
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.6 0.8 1. 1. 0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_recall
|
|
value: [1. 0.95555556 1. 0.97777778 1. 1.
|
|
0.97777778 1. 0.97777778 1. ]
|
|
|
|
mean value: 0.9888888888888889
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.7 0.7 0.9 0.7 0.9 0.8 0.9 1. ]
|
|
|
|
mean value: 0.8400000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.97777778 0.94444444 0.95555556 0.95555556 0.97777778
|
|
0.96666667 0.96666667 0.97777778 0.98888889]
|
|
|
|
mean value: 0.9711111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.5 0.57142857 0.83333333 0.625
|
|
0.8 0.71428571 0.8 1. ]
|
|
|
|
mean value: 0.7510714285714286
|
|
|
|
key: train_jcc
|
|
value: [1. 0.95555556 0.9 0.91666667 0.91836735 0.95744681
|
|
0.93617021 0.9375 0.95652174 0.97826087]
|
|
|
|
mean value: 0.9456489199133246
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01229525 0.01237893 0.01234698 0.01202965 0.0121994 0.01249433
|
|
0.01228952 0.01218224 0.01196837 0.01224113]
|
|
|
|
mean value: 0.012242579460144043
|
|
|
|
key: score_time
|
|
value: [0.01024342 0.01122355 0.0113194 0.01121879 0.01136684 0.01144981
|
|
0.01120973 0.01118469 0.0115819 0.01125216]
|
|
|
|
mean value: 0.011205029487609864
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 0.21821789 0.6 0.33333333 0.40824829
|
|
0.81649658 0.81649658 0. 0.81649658]
|
|
|
|
mean value: 0.5642282418671819
|
|
|
|
key: train_mcc
|
|
value: [1. 0.95650071 0.97801929 0.8675239 0.74278135 0.97801929
|
|
0.88910845 0.89442719 0.48257301 0.91111111]
|
|
|
|
mean value: 0.8700064322418951
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.6 0.8 0.6 0.7 0.9 0.9 0.5 0.9]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.97777778 0.98888889 0.93333333 0.85555556 0.98888889
|
|
0.94444444 0.94444444 0.68888889 0.95555556]
|
|
|
|
mean value: 0.9277777777777778
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.5 0.8 0.33333333 0.72727273
|
|
0.88888889 0.90909091 0. 0.90909091]
|
|
|
|
mean value: 0.6885858585858586
|
|
|
|
key: train_fscore
|
|
value: [1. 0.97826087 0.98876404 0.93181818 0.83116883 0.98876404
|
|
0.94505495 0.94736842 0.5483871 0.95555556]
|
|
|
|
mean value: 0.9115141990877197
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.66666667 0.8 1. 0.66666667
|
|
1. 0.83333333 0. 0.83333333]
|
|
|
|
mean value: 0.7466666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 0.95744681 1. 0.95348837 1. 1.
|
|
0.93478261 0.9 1. 0.95555556]
|
|
|
|
mean value: 0.9701273344854869
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.4 0.8 0.2 0.8 0.8 1. 0. 1. ]
|
|
|
|
mean value: 0.7000000000000001
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.97777778 0.91111111 0.71111111 0.97777778
|
|
0.95555556 1. 0.37777778 0.95555556]
|
|
|
|
mean value: 0.8866666666666667
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.6 0.8 0.6 0.7 0.9 0.9 0.5 0.9]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.97777778 0.98888889 0.93333333 0.85555556 0.98888889
|
|
0.94444444 0.94444444 0.68888889 0.95555556]
|
|
|
|
mean value: 0.9277777777777778
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.33333333 0.66666667 0.2 0.57142857
|
|
0.8 0.83333333 0. 0.83333333]
|
|
|
|
mean value: 0.5904761904761905
|
|
|
|
key: train_jcc
|
|
value: [1. 0.95744681 0.97777778 0.87234043 0.71111111 0.97777778
|
|
0.89583333 0.9 0.37777778 0.91489362]
|
|
|
|
mean value: 0.8584958628841608
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09080362 0.08026624 0.0802474 0.07980943 0.07940364 0.08048773
|
|
0.07949638 0.07976413 0.07984233 0.07969499]
|
|
|
|
mean value: 0.08098158836364747
|
|
|
|
key: score_time
|
|
value: [0.01442552 0.01436472 0.01435137 0.01420498 0.01422119 0.01444697
|
|
0.01426816 0.01428533 0.01425791 0.01430011]
|
|
|
|
mean value: 0.01431262493133545
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.65465367 0.81649658 0.81649658 0.81649658 0.81649658
|
|
1. 0.81649658 0.65465367 1. ]
|
|
|
|
mean value: 0.8208286826982311
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 0.9 0.9 0.9 0.9 1. 0.9 0.8 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.83333333 0.88888889 0.90909091 0.88888889 0.90909091
|
|
1. 0.90909091 0.83333333 1. ]
|
|
|
|
mean value: 0.908080808080808
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.71428571 1. 0.83333333 1. 0.83333333
|
|
1. 0.83333333 0.71428571 1. ]
|
|
|
|
mean value: 0.8761904761904762
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.8 1. 0.8 1. 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.96
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 0.9 0.9 0.9 0.9 1. 0.9 0.8 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.71428571 0.8 0.83333333 0.8 0.83333333
|
|
1. 0.83333333 0.71428571 1. ]
|
|
|
|
mean value: 0.8361904761904763
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02699709 0.03070116 0.03277135 0.05133557 0.02615666 0.02731848
|
|
0.047997 0.03133965 0.03537989 0.04155445]
|
|
|
|
mean value: 0.03515512943267822
|
|
|
|
key: score_time
|
|
value: [0.02213979 0.02183247 0.03555799 0.03083539 0.01692629 0.02012992
|
|
0.01762462 0.02948856 0.03649259 0.02368855]
|
|
|
|
mean value: 0.0254716157913208
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.65465367 0.81649658 0.81649658 1. 1.
|
|
1. 0.81649658 0.81649658 1. ]
|
|
|
|
mean value: 0.8737136575346607
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 0.97801929 1.
|
|
1. 1. 0.97801929 1. ]
|
|
|
|
mean value: 0.9956038587687303
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 0.9 0.9 1. 1. 1. 0.9 0.9 1. ]
|
|
|
|
mean value: 0.93
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 0.98888889 1.
|
|
1. 1. 0.98888889 1. ]
|
|
|
|
mean value: 0.9977777777777778
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.83333333 0.88888889 0.90909091 1. 1.
|
|
1. 0.90909091 0.90909091 1. ]
|
|
|
|
mean value: 0.9358585858585858
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 0.98901099 1.
|
|
1. 1. 0.98901099 1. ]
|
|
|
|
mean value: 0.9978021978021978
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.71428571 1. 0.83333333 1. 1.
|
|
1. 0.83333333 0.83333333 1. ]
|
|
|
|
mean value: 0.9047619047619048
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0.97826087 1.
|
|
1. 1. 0.97826087 1. ]
|
|
|
|
mean value: 0.9956521739130435
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.8 1. 1. 1. 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.98
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 0.9 0.9 1. 1. 1. 0.9 0.9 1. ]
|
|
|
|
mean value: 0.93
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 0.98888889 1.
|
|
1. 1. 0.98888889 1. ]
|
|
|
|
mean value: 0.9977777777777778
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.71428571 0.8 0.83333333 1. 1.
|
|
1. 0.83333333 0.83333333 1. ]
|
|
|
|
mean value: 0.8847619047619047
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 0.97826087 1.
|
|
1. 1. 0.97826087 1. ]
|
|
|
|
mean value: 0.9956521739130435
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01359749 0.01574779 0.01587391 0.02081776 0.01601291 0.01616716
|
|
0.01599646 0.01598001 0.01611137 0.01597762]
|
|
|
|
mean value: 0.016228246688842773
|
|
|
|
key: score_time
|
|
value: [0.01142287 0.01155448 0.01172853 0.01179075 0.01171851 0.01176023
|
|
0.01172566 0.01173353 0.01173139 0.01173902]
|
|
|
|
mean value: 0.011690497398376465
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.65465367 0.40824829 0.6 0.5 0.40824829
|
|
0.40824829 0.40824829 0.6 0.81649658]
|
|
|
|
mean value: 0.5620639994418881
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 0.7 0.8 0.7 0.7 0.7 0.7 0.8 0.9]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.83333333 0.66666667 0.8 0.57142857 0.72727273
|
|
0.72727273 0.72727273 0.8 0.90909091]
|
|
|
|
mean value: 0.7671428571428571
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.71428571 0.75 0.8 1. 0.66666667
|
|
0.66666667 0.66666667 0.8 0.83333333]
|
|
|
|
mean value: 0.7730952380952381
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.6 0.8 0.4 0.8 0.8 0.8 0.8 1. ]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 0.7 0.8 0.7 0.7 0.7 0.7 0.8 0.9]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.71428571 0.5 0.66666667 0.4 0.57142857
|
|
0.57142857 0.57142857 0.66666667 0.83333333]
|
|
|
|
mean value: 0.6328571428571429
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22217441 0.19093752 0.1718936 0.16672945 0.18235731 0.17839694
|
|
0.1745894 0.18664312 0.18885684 0.19141936]
|
|
|
|
mean value: 0.18539979457855224
|
|
|
|
key: score_time
|
|
value: [0.00920916 0.00894237 0.00916672 0.00911665 0.00934219 0.00979352
|
|
0.0089221 0.00914454 0.00932813 0.00907612]
|
|
|
|
mean value: 0.00920414924621582
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 0.81649658 0.65465367 0.65465367 1.
|
|
1. 0.81649658 0.81649658 0.81649658]
|
|
|
|
mean value: 0.8208286826982311
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.9 0.8 0.8 1. 1. 0.9 0.9 0.9]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.88888889 0.83333333 0.75 1.
|
|
1. 0.90909091 0.90909091 0.88888889]
|
|
|
|
mean value: 0.8997474747474747
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 1. 0.71428571 1. 1.
|
|
1. 0.83333333 0.83333333 1. ]
|
|
|
|
mean value: 0.9047619047619048
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.8 1. 0.6 1. 1. 1. 1. 0.8]
|
|
|
|
mean value: 0.92
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.9 0.8 0.8 1. 1. 0.9 0.9 0.9]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.8 0.71428571 0.6 1.
|
|
1. 0.83333333 0.83333333 0.8 ]
|
|
|
|
mean value: 0.8247619047619048
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01358581 0.01411629 0.01471233 0.01414609 0.02364945 0.02104473
|
|
0.01440454 0.01459599 0.01464772 0.01447725]
|
|
|
|
mean value: 0.01593801975250244
|
|
|
|
key: score_time
|
|
value: [0.01189876 0.01186109 0.01198363 0.0119555 0.01537371 0.01311707
|
|
0.01196265 0.01466823 0.01172829 0.01508021]
|
|
|
|
mean value: 0.012962913513183594
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.5 0.81649658 0.5 0.81649658
|
|
0.81649658 0.81649658 0.81649658 0.65465367]
|
|
|
|
mean value: 0.7737136575346607
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.7 0.9 0.7 0.9 0.9 0.9 0.9 0.8]
|
|
|
|
mean value: 0.87
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.57142857 0.88888889 0.57142857 0.88888889
|
|
0.88888889 0.88888889 0.88888889 0.75 ]
|
|
|
|
mean value: 0.8337301587301588
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.4 0.8 0.4 0.8 0.8 0.8 0.8 0.6]
|
|
|
|
mean value: 0.74
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.7 0.9 0.7 0.9 0.9 0.9 0.9 0.8]
|
|
|
|
mean value: 0.87
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.4 0.8 0.4 0.8 0.8 0.8 0.8 0.6]
|
|
|
|
mean value: 0.74
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0334847 0.03210235 0.03213644 0.03319669 0.03299642 0.03233814
|
|
0.0345664 0.03212976 0.03434682 0.03329611]
|
|
|
|
mean value: 0.03305938243865967
|
|
|
|
key: score_time
|
|
value: [0.01982021 0.02007699 0.02197194 0.02256417 0.02357554 0.0200212
|
|
0.0116353 0.02040458 0.02340674 0.0233531 ]
|
|
|
|
mean value: 0.020682978630065917
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.81649658 0.40824829 0.6 0.81649658 0.5
|
|
0.81649658 0.65465367 0.6 1. ]
|
|
|
|
mean value: 0.6867045374662996
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 0.97801929 0.97801929 0.97801929 0.97801929
|
|
0.97801929 0.97801929 0.97801929 0.97801929]
|
|
|
|
mean value: 0.9802173644592863
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.8 1. ]
|
|
|
|
mean value: 0.8300000000000001
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
|
|
0.98888889 0.98888889 0.98888889 0.98888889]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.90909091 0.66666667 0.8 0.90909091 0.76923077
|
|
0.88888889 0.83333333 0.8 1. ]
|
|
|
|
mean value: 0.840963480963481
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 0.98901099 0.98901099 0.98901099 0.98901099
|
|
0.98901099 0.98901099 0.98901099 0.98901099]
|
|
|
|
mean value: 0.9901098901098901
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 0.75 0.8 0.83333333 0.625
|
|
1. 0.71428571 0.8 1. ]
|
|
|
|
mean value: 0.8070238095238096
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
|
|
0.97826087 0.97826087 0.97826087 0.97826087]
|
|
|
|
mean value: 0.9804347826086957
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.6 0.8 1. 1. 0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.8 1. ]
|
|
|
|
mean value: 0.8300000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
|
|
0.98888889 0.98888889 0.98888889 0.98888889]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.83333333 0.5 0.66666667 0.83333333 0.625
|
|
0.8 0.71428571 0.66666667 1. ]
|
|
|
|
mean value: 0.7353571428571428
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
|
|
0.97826087 0.97826087 0.97826087 0.97826087]
|
|
|
|
mean value: 0.9804347826086957
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:196: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:199: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.10436153 0.10901594 0.15473795 0.18076968 0.18270421 0.19743443
|
|
0.19927812 0.20212865 0.22301292 0.22878146]
|
|
|
|
mean value: 0.17822248935699464
|
|
|
|
key: score_time
|
|
value: [0.0118041 0.02067137 0.01172853 0.02039146 0.02330875 0.02281046
|
|
0.02109528 0.02306652 0.0215745 0.02139878]
|
|
|
|
mean value: 0.019784975051879882
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.81649658 0.40824829 0.6 0.81649658 0.5
|
|
0.81649658 0.65465367 0.21821789 1. ]
|
|
|
|
mean value: 0.6485263264898988
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 0.97801929 0.97801929 0.97801929 0.97801929
|
|
0.97801929 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9824154350749212
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.6 1. ]
|
|
|
|
mean value: 0.81
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9911111111111112
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.90909091 0.66666667 0.8 0.90909091 0.76923077
|
|
0.88888889 0.83333333 0.66666667 1. ]
|
|
|
|
mean value: 0.8276301476301476
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 0.98901099 0.98901099 0.98901099 0.98901099
|
|
0.98901099 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9912087912087912
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 0.75 0.8 0.83333333 0.625
|
|
1. 0.71428571 0.57142857 1. ]
|
|
|
|
mean value: 0.7841666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9826086956521739
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.6 0.8 1. 1. 0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.6 1. ]
|
|
|
|
mean value: 0.81
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.991111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.83333333 0.5 0.66666667 0.83333333 0.625
|
|
0.8 0.71428571 0.5 1. ]
|
|
|
|
mean value: 0.7186904761904762
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9826086956521739
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.72
|