25421 lines
1.3 MiB
25421 lines
1.3 MiB
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_7030.py:549: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 531
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 531
|
|
ncols: 286
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 263
|
|
log10_or_mychisq 263
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 167
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 174
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification: 70/30
|
|
Train data size: (79, 174)
|
|
Test data size: (40, 174)
|
|
y_train numbers: Counter({0: 50, 1: 29})
|
|
y_train ratio: 1.7241379310344827
|
|
|
|
y_test_numbers: Counter({0: 26, 1: 14})
|
|
y_test ratio: 1.8571428571428572
|
|
-------------------------------------------------------------
|
|
|
|
Simple Random OverSampling
|
|
Counter({1: 50, 0: 50})
|
|
(100, 174)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 29, 1: 29})
|
|
(58, 174)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 50, 1: 50})
|
|
(100, 174)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({1: 50, 0: 50})
|
|
(100, 174)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: 70/30 split
|
|
Gene name: gid
|
|
Drug name: streptomycin
|
|
|
|
Output directory: /home/tanu/git/Data/streptomycin/output/ml/tts_7030/
|
|
Sanity checks:
|
|
ML source data size: (119, 174)
|
|
Total input features: (79, 174)
|
|
Target feature numbers: Counter({0: 50, 1: 29})
|
|
Target features ratio: 1.7241379310344827
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 35
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_na_affinity']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02347183 0.02686572 0.0307076 0.02450418 0.02523112 0.02600336
|
|
0.02457237 0.02707028 0.02396798 0.02385616]
|
|
|
|
mean value: 0.025625061988830567
|
|
|
|
key: score_time
|
|
value: [0.01185417 0.00754213 0.01175284 0.01153183 0.01146317 0.01155972
|
|
0.01150918 0.01154113 0.01146698 0.01157117]
|
|
|
|
mean value: 0.011179232597351074
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.46666667 0.46666667 0.74535599 0.6
|
|
0.77459667 -0.29277002 0.74535599 0.09128709]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.91067388 0.88152145 0.90865445 0.90865445 0.90865445 0.87863248
|
|
0.84744528 0.87830162 0.87830162 0.91085367]
|
|
|
|
mean value: 0.8911693370709752
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.75 0.875 0.75
|
|
0.875 0.5 0.875 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.95774648 0.94366197 0.95774648 0.95774648 0.95774648 0.94366197
|
|
0.92957746 0.94366197 0.94366197 0.95833333]
|
|
|
|
mean value: 0.9493544600938967
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.5 nan 0.66666667 0.66666667 0.8 0.75
|
|
0.85714286 0. 0.8 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.93877551 0.91666667 0.94117647 0.94117647 0.94117647 0.92307692
|
|
0.90196078 0.92 0.92 0.94339623]
|
|
|
|
mean value: 0.9287405522441197
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.66666667 1. 0.6
|
|
0.75 0. 1. 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.96 0.96 0.96 0.92307692
|
|
0.92 0.95833333 0.95833333 0.96153846]
|
|
|
|
mean value: 0.9601282051282052
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.66666667 1.
|
|
1. 0. 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.88461538 0.84615385 0.92307692 0.92307692 0.92307692 0.92307692
|
|
0.88461538 0.88461538 0.88461538 0.92592593]
|
|
|
|
mean value: 0.9002849002849003
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.73333333 0.73333333 0.83333333 0.8
|
|
0.9 0.4 0.83333333 0.55 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.94230769 0.92307692 0.95042735 0.95042735 0.95042735 0.93931624
|
|
0.92008547 0.93119658 0.93119658 0.95185185]
|
|
|
|
mean value: 0.9390313390313391
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.5 0.5 0.66666667 0.6
|
|
0.75 0. 0.66666667 0.25 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.88461538 0.84615385 0.88888889 0.88888889 0.88888889 0.85714286
|
|
0.82142857 0.85185185 0.85185185 0.89285714]
|
|
|
|
mean value: 0.8672568172568172
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.53741789 0.69267988 0.48575902 0.56827617 0.53399444 0.59227157
|
|
0.52956486 0.53589177 0.53715754 0.65834451]
|
|
|
|
mean value: 0.5671357631683349
|
|
|
|
key: score_time
|
|
value: [0.01202703 0.00642514 0.01250315 0.01604509 0.01320004 0.01179504
|
|
0.01307392 0.01395082 0.01368642 0.01708794]
|
|
|
|
mean value: 0.012979459762573243
|
|
|
|
key: test_mcc
|
|
value: [0.1490712 nan 0.46666667 0.46666667 0.46666667 0.6
|
|
0.6 0.1490712 0.46666667 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.82385255 1. 0.96986363 0.9703421 1. 0.93931624
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9703374516508763
|
|
|
|
key: test_accuracy
|
|
value: [0.625 nan 0.75 0.75 0.75 0.75
|
|
0.75 0.625 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.91549296 1. 0.98591549 0.98591549 1. 0.97183099
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9859154929577465
|
|
|
|
key: test_fscore
|
|
value: [0.4 nan 0.66666667 0.66666667 0.66666667 0.75
|
|
0.75 0.4 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.86956522 1. 0.98039216 0.98113208 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9792627911264209
|
|
|
|
key: test_precision
|
|
value: [0.5 nan 0.66666667 0.66666667 0.66666667 0.6
|
|
0.6 0.5 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.96296296 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9924501424501424
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.66666667 1.
|
|
1. 0.33333333 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.76923077 1. 0.96153846 1. 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9692307692307692
|
|
|
|
key: test_roc_auc
|
|
value: [0.56666667 nan 0.73333333 0.73333333 0.73333333 0.8
|
|
0.8 0.56666667 0.73333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.88461538 1. 0.98076923 0.98888889 1. 0.96965812
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9823931623931624
|
|
|
|
key: test_jcc
|
|
value: [0.25 nan 0.5 0.5 0.5 0.6 0.6 0.25 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.76923077 1. 0.96153846 0.96296296 1. 0.92592593
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.961965811965812
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01300025 0.01131201 0.01010251 0.00901103 0.00829911 0.00847673
|
|
0.00845861 0.00850534 0.00863886 0.00855303]
|
|
|
|
mean value: 0.009435749053955078
|
|
|
|
key: score_time
|
|
value: [0.01224828 0.00450897 0.00966024 0.00870657 0.00878048 0.00860023
|
|
0.00848126 0.00857925 0.00853801 0.00865126]
|
|
|
|
mean value: 0.008675456047058105
|
|
|
|
key: test_mcc
|
|
value: [-0.06666667 nan 0.06666667 -0.25819889 0. 0.6
|
|
0.06666667 -0.06666667 -0.46666667 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.63589744 0.3217793 0.4760037 0.61337378 0.48136848 0.59111411
|
|
0.59101806 0.61021596 0.61021596 0.61560271]
|
|
|
|
mean value: 0.554658949505139
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan 0.5 0.375 0.375 0.75
|
|
0.5 0.5 0.25 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.83098592 0.54929577 0.64788732 0.76056338 0.67605634 0.77464789
|
|
0.76056338 0.77464789 0.77464789 0.77777778]
|
|
|
|
mean value: 0.7327073552425665
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 nan 0.5 0.28571429 0.54545455 0.75
|
|
0.5 0.33333333 0.25 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.76923077 0.6097561 0.67532468 0.75362319 0.68493151 0.75
|
|
0.74626866 0.75757576 0.75757576 0.76470588]
|
|
|
|
mean value: 0.7268992291592407
|
|
|
|
key: test_precision
|
|
value: [0.33333333 nan 0.4 0.25 0.375 0.6
|
|
0.4 0.33333333 0.2 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.76923077 0.44642857 0.50980392 0.60465116 0.53191489 0.63157895
|
|
0.6097561 0.625 0.625 0.63414634]
|
|
|
|
mean value: 0.5987510705028498
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 1. 1.
|
|
0.66666667 0.33333333 0.33333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.76923077 0.96153846 1. 1. 0.96153846 0.92307692
|
|
0.96153846 0.96153846 0.96153846 0.96296296]
|
|
|
|
mean value: 0.9462962962962963
|
|
|
|
key: test_roc_auc
|
|
value: [0.46666667 nan 0.53333333 0.36666667 0.5 0.8
|
|
0.53333333 0.46666667 0.26666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.81794872 0.63632479 0.72222222 0.81111111 0.73632479 0.80598291
|
|
0.80299145 0.81410256 0.81410256 0.81481481]
|
|
|
|
mean value: 0.7775925925925926
|
|
|
|
key: test_jcc
|
|
value: [0.2 nan 0.33333333 0.16666667 0.375 0.6
|
|
0.33333333 0.2 0.14285714 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.625 0.43859649 0.50980392 0.60465116 0.52083333 0.6
|
|
0.5952381 0.6097561 0.6097561 0.61904762]
|
|
|
|
mean value: 0.5732682818328394
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00992274 0.00908804 0.01092887 0.00855708 0.00939322 0.00976706
|
|
0.00851512 0.00860643 0.00846624 0.00871468]
|
|
|
|
mean value: 0.009195947647094726
|
|
|
|
key: score_time
|
|
value: [0.01040363 0.00474834 0.00870204 0.00912285 0.00954747 0.0092864
|
|
0.00849009 0.00853539 0.00894189 0.00856447]
|
|
|
|
mean value: 0.008634257316589355
|
|
|
|
key: test_mcc
|
|
value: [-0.29277002 nan 0.1490712 0. 0.46666667 0.25819889
|
|
-0.29277002 -0.29277002 0.1490712 -0.25819889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.50503962 0.50503962 0.56963094 0.56963094 0.4660252 0.532629
|
|
0.53764379 0.53764379 0.4660252 0.621059 ]
|
|
|
|
mean value: 0.5310367097131722
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan 0.625 0.625 0.75 0.625
|
|
0.5 0.5 0.625 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.77464789 0.77464789 0.8028169 0.8028169 0.76056338 0.78873239
|
|
0.78873239 0.78873239 0.76056338 0.81944444]
|
|
|
|
mean value: 0.7861697965571205
|
|
|
|
key: test_fscore
|
|
value: [0. nan 0.4 0. 0.66666667 0.57142857
|
|
0. 0. 0.4 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.6 0.6 0.66666667 0.66666667 0.58536585 0.65116279
|
|
0.63414634 0.63414634 0.58536585 0.69767442]
|
|
|
|
mean value: 0.6321194932879561
|
|
|
|
key: test_precision
|
|
value: [0. nan 0.5 0. 0.66666667 0.5
|
|
0. 0. 0.5 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.85714286 0.85714286 0.875 0.875 0.8 0.82352941
|
|
0.86666667 0.86666667 0.8 0.9375 ]
|
|
|
|
mean value: 0.8558648459383753
|
|
|
|
key: test_recall
|
|
value: [0. nan 0.33333333 0. 0.66666667 0.66666667
|
|
0. 0. 0.33333333 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.46153846 0.46153846 0.53846154 0.53846154 0.46153846 0.53846154
|
|
0.5 0.5 0.46153846 0.55555556]
|
|
|
|
mean value: 0.5017094017094017
|
|
|
|
key: test_roc_auc
|
|
value: [0.4 nan 0.56666667 0.5 0.73333333 0.63333333
|
|
0.4 0.4 0.56666667 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.70854701 0.70854701 0.74700855 0.74700855 0.6974359 0.73589744
|
|
0.72777778 0.72777778 0.6974359 0.76666667]
|
|
|
|
mean value: 0.7264102564102564
|
|
|
|
key: test_jcc
|
|
value: [0. nan 0.25 0. 0.5 0.4 0. 0. 0.25 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.42857143 0.42857143 0.5 0.5 0.4137931 0.48275862
|
|
0.46428571 0.46428571 0.4137931 0.53571429]
|
|
|
|
mean value: 0.4631773399014778
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00953317 0.01132226 0.00892282 0.00931168 0.00937891 0.00864029
|
|
0.00916529 0.00923681 0.00909519 0.00845647]
|
|
|
|
mean value: 0.00930628776550293
|
|
|
|
key: score_time
|
|
value: [0.04363441 0.00601792 0.01144981 0.01010704 0.00944614 0.00927305
|
|
0.00956726 0.01000333 0.014678 0.00946236]
|
|
|
|
mean value: 0.013363933563232422
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.46666667 0.48795004 0.46666667 0.25819889
|
|
0.48795004 0. -0.4472136 0.64549722]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.39440661 0.50503962 0.35808137 0.42968701 0.50503962 0.51530373
|
|
0.4660252 0.49787306 0.56963094 0.52098273]
|
|
|
|
mean value: 0.47620698672135525
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.75 0.75 0.625
|
|
0.75 0.625 0.375 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.73239437 0.77464789 0.71830986 0.74647887 0.77464789 0.77464789
|
|
0.76056338 0.77464789 0.8028169 0.77777778]
|
|
|
|
mean value: 0.7636932707355243
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.66666667 0.5 0.66666667 0.57142857
|
|
0.5 0. 0. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.53658537 0.6 0.5 0.57142857 0.6 0.57894737
|
|
0.58536585 0.63636364 0.66666667 0.61904762]
|
|
|
|
mean value: 0.5894405081439741
|
|
|
|
key: test_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[1. nan 0.66666667 1. 0.66666667 0.5
|
|
1. 0. 0. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.73333333 0.85714286 0.71428571 0.75 0.85714286 0.91666667
|
|
0.8 0.77777778 0.875 0.86666667]
|
|
|
|
mean value: 0.8148015873015872
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 0.66666667 0.66666667
|
|
0.33333333 0. 0. 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.42307692 0.46153846 0.38461538 0.46153846 0.46153846 0.42307692
|
|
0.46153846 0.53846154 0.53846154 0.48148148]
|
|
|
|
mean value: 0.4635327635327635
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.73333333 0.66666667 0.73333333 0.63333333
|
|
0.66666667 0.5 0.3 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.66709402 0.70854701 0.64786325 0.68632479 0.70854701 0.70042735
|
|
0.6974359 0.72478632 0.74700855 0.71851852]
|
|
|
|
mean value: 0.7006552706552707
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.5 0.33333333 0.5 0.4
|
|
0.33333333 0. 0. 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.36666667 0.42857143 0.33333333 0.4 0.42857143 0.40740741
|
|
0.4137931 0.46666667 0.5 0.44827586]
|
|
|
|
mean value: 0.41932858967341724
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00903344 0.0091846 0.00961351 0.00917578 0.0096128 0.00965309
|
|
0.00942969 0.0092864 0.00940824 0.00987148]
|
|
|
|
mean value: 0.00942690372467041
|
|
|
|
key: score_time
|
|
value: [0.00883532 0.00442553 0.00902295 0.00915003 0.00871062 0.00882101
|
|
0.00899625 0.00945115 0.00905657 0.00935435]
|
|
|
|
mean value: 0.008582377433776855
|
|
|
|
key: test_mcc
|
|
value: [0.48795004 nan 0.48795004 0.48795004 0.48795004 0.74535599
|
|
0. 0. 0.48795004 0.3 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.68088097 0.6228411 0.56330071 0.59331346 0.56330071 0.6228411
|
|
0.56330071 0.68088097 0.6228411 0.66226618]
|
|
|
|
mean value: 0.6175767019079942
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.75 0.75 0.875
|
|
0.625 0.625 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.84507042 0.81690141 0.78873239 0.8028169 0.78873239 0.81690141
|
|
0.78873239 0.84507042 0.81690141 0.83333333]
|
|
|
|
mean value: 0.814319248826291
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.5 0.5 0.5 0.8 0. 0. 0.5 0.5]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.73170732 0.66666667 0.59459459 0.63157895 0.59459459 0.66666667
|
|
0.59459459 0.73170732 0.66666667 0.71428571]
|
|
|
|
mean value: 0.659306307958426
|
|
|
|
key: test_precision
|
|
value: [1. nan 1. 1. 1. 1. 0. 0. 1. 0.5]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.33333333 0.33333333 0.33333333 0.66666667
|
|
0. 0. 0.33333333 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.57692308 0.5 0.42307692 0.46153846 0.42307692 0.5
|
|
0.42307692 0.57692308 0.5 0.55555556]
|
|
|
|
mean value: 0.49401709401709404
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.66666667 0.66666667 0.66666667 0.83333333
|
|
0.5 0.5 0.66666667 0.65 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.78846154 0.75 0.71153846 0.73076923 0.71153846 0.75
|
|
0.71153846 0.78846154 0.75 0.77777778]
|
|
|
|
mean value: 0.747008547008547
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.33333333 0.33333333 0.33333333 0.66666667
|
|
0. 0. 0.33333333 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.57692308 0.5 0.42307692 0.46153846 0.42307692 0.5
|
|
0.42307692 0.57692308 0.5 0.55555556]
|
|
|
|
mean value: 0.49401709401709404
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.36889744 0.38598275 0.39493704 0.38300252 0.44200897 0.40054107
|
|
0.43577385 0.40177441 0.4005568 0.47488117]
|
|
|
|
mean value: 0.40883560180664064
|
|
|
|
key: score_time
|
|
value: [0.01228499 0.00684047 0.01225257 0.0120151 0.0121727 0.0118804
|
|
0.01362801 0.01195502 0.01198483 0.01258683]
|
|
|
|
mean value: 0.011760091781616211
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.46666667 0.25819889 0.46666667 0.6
|
|
0.77459667 -0.29277002 0.46666667 -0.09128709]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.625 0.75 0.75
|
|
0.875 0.5 0.75 0.42857143]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.66666667 0.57142857 0.66666667 0.75
|
|
0.85714286 0. 0.66666667 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.5 0.66666667 0.6
|
|
0.75 0. 0.66666667 0.25 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.66666667 1.
|
|
1. 0. 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.73333333 0.63333333 0.73333333 0.8
|
|
0.9 0.4 0.73333333 0.45 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.5 0.4 0.5 0.6
|
|
0.75 0. 0.5 0.2 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01571584 0.01269054 0.01136661 0.01086378 0.00994062 0.00986242
|
|
0.01026821 0.01012969 0.00951695 0.01019168]
|
|
|
|
mean value: 0.011054635047912598
|
|
|
|
key: score_time
|
|
value: [0.01275587 0.00504398 0.00905156 0.00882149 0.00875378 0.00857162
|
|
0.00925541 0.00867009 0.00854111 0.00858116]
|
|
|
|
mean value: 0.008804607391357421
|
|
|
|
key: test_mcc
|
|
value: [0.25819889 nan 0.77459667 0.77459667 1. 0.6
|
|
0.74535599 0.74535599 0.74535599 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 nan 0.875 0.875 1. 0.75
|
|
0.875 0.875 0.875 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 nan 0.85714286 0.85714286 1. 0.75
|
|
0.8 0.8 0.8 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 nan 0.75 0.75 1. 0.6 1. 1. 1. 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 1. 1. 1. 1.
|
|
0.66666667 0.66666667 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.63333333 nan 0.9 0.9 1. 0.8
|
|
0.83333333 0.83333333 0.83333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 nan 0.75 0.75 1. 0.6
|
|
0.66666667 0.66666667 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08180642 0.08224797 0.08108139 0.08005643 0.08400655 0.08195567
|
|
0.083637 0.08279467 0.08375001 0.08794379]
|
|
|
|
mean value: 0.0829279899597168
|
|
|
|
key: score_time
|
|
value: [0.01682615 0.00449514 0.01753378 0.01695251 0.01697278 0.01840544
|
|
0.01810956 0.01845503 0.01751947 0.01944685]
|
|
|
|
mean value: 0.016471672058105468
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.74535599 -0.06666667 0.48795004 0.6
|
|
0.74535599 -0.29277002 0.1490712 0.3 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.875 0.5 0.75 0.75
|
|
0.875 0.5 0.625 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.8 0.33333333 0.5 0.75
|
|
0.8 0. 0.4 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 1. 0.33333333 1. 0.6
|
|
1. 0. 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 0.33333333 1.
|
|
0.66666667 0. 0.33333333 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.83333333 0.46666667 0.66666667 0.8
|
|
0.83333333 0.4 0.56666667 0.65 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.66666667 0.2 0.33333333 0.6
|
|
0.66666667 0. 0.25 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00917554 0.00918388 0.01030898 0.00913 0.00861025 0.00867605
|
|
0.00893021 0.00875688 0.00861931 0.00882649]
|
|
|
|
mean value: 0.009021759033203125
|
|
|
|
key: score_time
|
|
value: [0.00911689 0.00463772 0.00918674 0.00968862 0.00868392 0.00932837
|
|
0.00896454 0.00857091 0.00860333 0.00897813]
|
|
|
|
mean value: 0.008575916290283203
|
|
|
|
key: test_mcc
|
|
value: [ 0.1490712 nan 0.46666667 -0.06666667 0.1490712 1.
|
|
0.77459667 -0.4472136 0.46666667 -0.25819889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 nan 0.75 0.5 0.625 1.
|
|
0.875 0.375 0.75 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 nan 0.66666667 0.33333333 0.4 1.
|
|
0.85714286 0. 0.66666667 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 nan 0.66666667 0.33333333 0.5 1.
|
|
0.75 0. 0.66666667 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 0.33333333 1.
|
|
1. 0. 0.66666667 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.56666667 nan 0.73333333 0.46666667 0.56666667 1.
|
|
0.9 0.3 0.73333333 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 nan 0.5 0.2 0.25 1. 0.75 0. 0.5 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.00864148 1.00945377 1.01145029 1.02092719 1.02262855 1.02108669
|
|
1.03264499 1.03610301 0.99609876 0.99602723]
|
|
|
|
mean value: 1.0155061960220337
|
|
|
|
key: score_time
|
|
value: [0.09197426 0.00447416 0.09149528 0.14959788 0.09277177 0.09351969
|
|
0.09247947 0.09435916 0.08660769 0.09346843]
|
|
|
|
mean value: 0.08907477855682373
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.74535599 0.25819889 0.48795004 0.77459667
|
|
0.74535599 -0.29277002 0.48795004 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.875 0.625 0.75 0.875
|
|
0.875 0.5 0.75 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.8 0.57142857 0.5 0.85714286
|
|
0.8 0. 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 1. 0.5 1. 0.75
|
|
1. 0. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.33333333 1.
|
|
0.66666667 0. 0.33333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.83333333 0.63333333 0.66666667 0.9
|
|
0.83333333 0.4 0.66666667 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.66666667 0.4 0.33333333 0.75
|
|
0.66666667 0. 0.33333333 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
|
|
key: fit_time
|
|
value: [1.67271042 0.90515995 0.86359429 0.90234375 0.96636534 0.87128806
|
|
0.8657763 0.87282944 0.88145161 0.83073807]
|
|
|
|
mean value: 0.9632257223129272
|
|
|
|
key: score_time
|
|
value: [0.21594787 0.00481105 0.17994237 0.17904091 0.21373725 0.20884776
|
|
0.17252564 0.20995092 0.20395184 0.22969842]
|
|
|
|
mean value: 0.18184540271759034
|
|
|
|
key: test_mcc
|
|
value: [0.48795004 nan 0.46666667 0.48795004 0.48795004 0.74535599
|
|
0.74535599 0.48795004 0.48795004 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.88152145 0.91067388 0.94010481 0.96986363 0.94010481 0.96986363
|
|
0.94010481 0.94010481 0.94010481 0.97058178]
|
|
|
|
mean value: 0.9403028409607979
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.75 0.75 0.875
|
|
0.875 0.75 0.75 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.94366197 0.95774648 0.97183099 0.98591549 0.97183099 0.98591549
|
|
0.97183099 0.97183099 0.97183099 0.98611111]
|
|
|
|
mean value: 0.9718505477308295
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.66666667 0.5 0.5 0.8
|
|
0.8 0.5 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.91666667 0.93877551 0.96 0.98039216 0.96 0.98039216
|
|
0.96 0.96 0.96 0.98113208]
|
|
|
|
mean value: 0.9597358566067937
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 1. 1. 1.
|
|
1. 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 0.33333333 0.66666667
|
|
0.66666667 0.33333333 0.33333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.84615385 0.88461538 0.92307692 0.96153846 0.92307692 0.96153846
|
|
0.92307692 0.92307692 0.92307692 0.96296296]
|
|
|
|
mean value: 0.9232193732193732
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.73333333 0.66666667 0.66666667 0.83333333
|
|
0.83333333 0.66666667 0.66666667 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.92307692 0.94230769 0.96153846 0.98076923 0.96153846 0.98076923
|
|
0.96153846 0.96153846 0.96153846 0.98148148]
|
|
|
|
mean value: 0.9616096866096866
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.5 0.33333333 0.33333333 0.66666667
|
|
0.66666667 0.33333333 0.33333333 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.84615385 0.88461538 0.92307692 0.96153846 0.92307692 0.96153846
|
|
0.92307692 0.92307692 0.92307692 0.96296296]
|
|
|
|
mean value: 0.9232193732193732
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00874162 0.00872374 0.00922036 0.00902081 0.00892901 0.00882077
|
|
0.0089941 0.00903893 0.00934553 0.00889754]
|
|
|
|
mean value: 0.008973240852355957
|
|
|
|
key: score_time
|
|
value: [0.00875211 0.00437403 0.00965929 0.00880575 0.00879216 0.00873375
|
|
0.00872302 0.00874352 0.00864363 0.0090158 ]
|
|
|
|
mean value: 0.00842430591583252
|
|
|
|
key: test_mcc
|
|
value: [-0.29277002 nan 0.1490712 0. 0.46666667 0.25819889
|
|
-0.29277002 -0.29277002 0.1490712 -0.25819889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.50503962 0.50503962 0.56963094 0.56963094 0.4660252 0.532629
|
|
0.53764379 0.53764379 0.4660252 0.621059 ]
|
|
|
|
mean value: 0.5310367097131722
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan 0.625 0.625 0.75 0.625
|
|
0.5 0.5 0.625 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.77464789 0.77464789 0.8028169 0.8028169 0.76056338 0.78873239
|
|
0.78873239 0.78873239 0.76056338 0.81944444]
|
|
|
|
mean value: 0.7861697965571205
|
|
|
|
key: test_fscore
|
|
value: [0. nan 0.4 0. 0.66666667 0.57142857
|
|
0. 0. 0.4 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.6 0.6 0.66666667 0.66666667 0.58536585 0.65116279
|
|
0.63414634 0.63414634 0.58536585 0.69767442]
|
|
|
|
mean value: 0.6321194932879561
|
|
|
|
key: test_precision
|
|
value: [0. nan 0.5 0. 0.66666667 0.5
|
|
0. 0. 0.5 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.85714286 0.85714286 0.875 0.875 0.8 0.82352941
|
|
0.86666667 0.86666667 0.8 0.9375 ]
|
|
|
|
mean value: 0.8558648459383753
|
|
|
|
key: test_recall
|
|
value: [0. nan 0.33333333 0. 0.66666667 0.66666667
|
|
0. 0. 0.33333333 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.46153846 0.46153846 0.53846154 0.53846154 0.46153846 0.53846154
|
|
0.5 0.5 0.46153846 0.55555556]
|
|
|
|
mean value: 0.5017094017094017
|
|
|
|
key: test_roc_auc
|
|
value: [0.4 nan 0.56666667 0.5 0.73333333 0.63333333
|
|
0.4 0.4 0.56666667 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.70854701 0.70854701 0.74700855 0.74700855 0.6974359 0.73589744
|
|
0.72777778 0.72777778 0.6974359 0.76666667]
|
|
|
|
mean value: 0.7264102564102564
|
|
|
|
key: test_jcc
|
|
value: [0. nan 0.25 0. 0.5 0.4 0. 0. 0.25 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
|
|
value: [0.42857143 0.42857143 0.5 0.5 0.4137931 0.48275862
|
|
0.46428571 0.46428571 0.4137931 0.53571429]
|
|
|
|
mean value: 0.4631773399014778
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.08834314 0.03863764 0.04068565 0.0569241 0.07989025 0.09637451
|
|
0.07425714 0.03369784 0.03540182 0.0447278 ]
|
|
|
|
mean value: 0.058893990516662595
|
|
|
|
key: score_time
|
|
value: [0.01159501 0.00512886 0.0114882 0.01043797 0.01063323 0.01238704
|
|
0.01091051 0.01177049 0.01157951 0.01068592]
|
|
|
|
mean value: 0.010661673545837403
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.74535599 1. 1. 0.6
|
|
0.74535599 1. 0.74535599 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.875 1. 1. 0.75
|
|
0.875 1. 0.875 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.8 1. 1. 0.75 0.8 1. 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 1. 1. 1. 0.6
|
|
1. 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 1. 1.
|
|
0.66666667 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.83333333 1. 1. 0.8
|
|
0.83333333 1. 0.83333333 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.66666667 1. 1. 0.6
|
|
0.66666667 1. 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02861381 0.01710796 0.01704741 0.02752972 0.01703906 0.0173142
|
|
0.0171032 0.01710773 0.01708961 0.01672173]
|
|
|
|
mean value: 0.01926743984222412
|
|
|
|
key: score_time
|
|
value: [0.01201081 0.00585961 0.01186156 0.0116775 0.01184201 0.01153588
|
|
0.01147604 0.01150894 0.01155281 0.01160359]
|
|
|
|
mean value: 0.011092877388000489
|
|
|
|
key: test_mcc
|
|
value: [ 0.77459667 nan 0.77459667 0.25819889 0.1490712 0.25819889
|
|
0.74535599 -0.06666667 0.06666667 0.3 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.96986363 0.96986363 1. 0.96986363 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9909590875629278
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.875 0.625 0.625 0.625
|
|
0.875 0.5 0.5 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98591549 0.98591549 1. 0.98591549 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.995774647887324
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 nan 0.85714286 0.57142857 0.4 0.57142857
|
|
0.8 0.33333333 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98039216 0.98039216 1. 0.98039216 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9941176470588236
|
|
|
|
key: test_precision
|
|
value: [0.75 nan 0.75 0.5 0.5 0.5
|
|
1. 0.33333333 0.4 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan 1. 0.66666667 0.33333333 0.66666667
|
|
0.66666667 0.33333333 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.96153846 0.96153846 1. 0.96153846 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9884615384615385
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan 0.9 0.63333333 0.56666667 0.63333333
|
|
0.83333333 0.46666667 0.53333333 0.65 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98076923 0.98076923 1. 0.98076923 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9942307692307693
|
|
|
|
key: test_jcc
|
|
value: [0.75 nan 0.75 0.4 0.25 0.4
|
|
0.66666667 0.2 0.33333333 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.96153846 0.96153846 1. 0.96153846 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9884615384615385
|
|
|
|
MCC on Blind test: -0.1
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01906657 0.00880098 0.0084815 0.00850773 0.00844955 0.0085187
|
|
0.00853658 0.00868392 0.00857282 0.00848532]
|
|
|
|
mean value: 0.009610366821289063
|
|
|
|
key: score_time
|
|
value: [0.00889063 0.00434232 0.00853539 0.00835586 0.00840497 0.00841403
|
|
0.00842023 0.00843906 0.00841045 0.00846195]
|
|
|
|
mean value: 0.008067488670349121
|
|
|
|
key: test_mcc
|
|
value: [-0.06666667 nan 0.74535599 0.1490712 0.74535599 0.48795004
|
|
0.46666667 0.1490712 0.48795004 0.3 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.5638813 0.56963094 0.59712276 0.66078849 0.59712276 0.59831714
|
|
0.62887412 0.56542032 0.56928569 0.57594697]
|
|
|
|
mean value: 0.5926390492106871
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan 0.875 0.625 0.875 0.75
|
|
0.75 0.625 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.8028169 0.8028169 0.81690141 0.84507042 0.81690141 0.81690141
|
|
0.83098592 0.8028169 0.8028169 0.80555556]
|
|
|
|
mean value: 0.8143583724569641
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 nan 0.8 0.4 0.8 0.5
|
|
0.66666667 0.4 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.69565217 0.66666667 0.71111111 0.7755102 0.71111111 0.73469388
|
|
0.73913043 0.70833333 0.72 0.70833333]
|
|
|
|
mean value: 0.7170542245883861
|
|
|
|
key: test_precision
|
|
value: [0.33333333 nan 1. 0.5 1. 1.
|
|
0.66666667 0.5 1. 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.8 0.875 0.84210526 0.82608696 0.84210526 0.7826087
|
|
0.85 0.77272727 0.75 0.80952381]
|
|
|
|
mean value: 0.8150157260740785
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.33333333 0.66666667 0.33333333
|
|
0.66666667 0.33333333 0.33333333 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.61538462 0.53846154 0.61538462 0.73076923 0.61538462 0.69230769
|
|
0.65384615 0.65384615 0.69230769 0.62962963]
|
|
|
|
mean value: 0.6437321937321937
|
|
|
|
key: test_roc_auc
|
|
value: [0.46666667 nan 0.83333333 0.56666667 0.83333333 0.66666667
|
|
0.73333333 0.56666667 0.66666667 0.65 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.76324786 0.74700855 0.77435897 0.82094017 0.77435897 0.79059829
|
|
0.79358974 0.77136752 0.77948718 0.77037037]
|
|
|
|
mean value: 0.7785327635327636
|
|
|
|
key: test_jcc
|
|
value: [0.2 nan 0.66666667 0.25 0.66666667 0.33333333
|
|
0.5 0.25 0.33333333 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.53333333 0.5 0.55172414 0.63333333 0.55172414 0.58064516
|
|
0.5862069 0.5483871 0.5625 0.5483871 ]
|
|
|
|
mean value: 0.5596241193919169
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00974989 0.01256609 0.01255202 0.01197124 0.01251864 0.01321697
|
|
0.01367021 0.012501 0.01297402 0.01391482]
|
|
|
|
mean value: 0.012563490867614746
|
|
|
|
key: score_time
|
|
value: [0.00909543 0.00559807 0.01082706 0.01195002 0.01125741 0.01126671
|
|
0.01122785 0.0112493 0.01124573 0.01133037]
|
|
|
|
mean value: 0.010504794120788575
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.46666667 0.25819889 0.46666667 0.29277002
|
|
0.77459667 -0.29277002 0.46666667 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.91067388 0.91067388 0.94196687 0.90865445 0.91067388 0.88861107
|
|
0.9703421 0.90865445 0.94196687 0.97100831]
|
|
|
|
mean value: 0.9263225781397904
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.75 0.625 0.75 0.5
|
|
0.875 0.5 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.95774648 0.95774648 0.97183099 0.95774648 0.95774648 0.94366197
|
|
0.98591549 0.95774648 0.97183099 0.98611111]
|
|
|
|
mean value: 0.9648082942097026
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.66666667 0.57142857 0.66666667 0.6
|
|
0.85714286 0. 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.93877551 0.93877551 0.96296296 0.94117647 0.93877551 0.92857143
|
|
0.98113208 0.94117647 0.96296296 0.98181818]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
0.9516127083575949
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.5 0.66666667 0.42857143
|
|
0.75 0. 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.92857143 0.96 1. 0.86666667
|
|
0.96296296 0.96 0.92857143 0.96428571]
|
|
|
|
mean value: 0.9571058201058201
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.66666667 1.
|
|
1. 0. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.88461538 0.88461538 1. 0.92307692 0.88461538 1.
|
|
1. 0.92307692 1. 1. ]
|
|
|
|
mean value: 0.95
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.73333333 0.63333333 0.73333333 0.6
|
|
0.9 0.4 0.73333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.94230769 0.94230769 0.97777778 0.95042735 0.94230769 0.95555556
|
|
0.98888889 0.95042735 0.97777778 0.98888889]
|
|
|
|
mean value: 0.9616666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.5 0.4 0.5 0.42857143
|
|
0.75 0. 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.88461538 0.88461538 0.92857143 0.88888889 0.88461538 0.86666667
|
|
0.96296296 0.88888889 0.92857143 0.96428571]
|
|
|
|
mean value: 0.9082682132682133
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01293874 0.01226783 0.01234913 0.01200628 0.01229644 0.01227403
|
|
0.0121088 0.01211166 0.01241088 0.01239038]
|
|
|
|
mean value: 0.01231541633605957
|
|
|
|
key: score_time
|
|
value: [0.00994158 0.00611711 0.01135921 0.01127291 0.01130056 0.01134491
|
|
0.01127958 0.01128697 0.01127529 0.01119423]
|
|
|
|
mean value: 0.010637235641479493
|
|
|
|
key: test_mcc
|
|
value: [ 0.48795004 nan 0.06666667 0.46666667 0.46666667 0.77459667
|
|
0.74535599 -0.29277002 0.25819889 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.9703421 0.86343483 0.81649844 0.79294273 0.82385255
|
|
0.70957488 0.78854022 0.79294273 1. ]
|
|
|
|
mean value: 0.8558128479158662
|
|
|
|
key: test_accuracy
|
|
value: [0.75 nan 0.5 0.75 0.75 0.875
|
|
0.875 0.5 0.625 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98591549 0.92957746 0.91549296 0.88732394 0.91549296
|
|
0.85915493 0.90140845 0.88732394 1. ]
|
|
|
|
mean value: 0.928169014084507
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan 0.5 0.66666667 0.66666667 0.85714286
|
|
0.8 0. 0.57142857 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98113208 0.9122807 0.88 0.86666667 0.86956522
|
|
0.76190476 0.85106383 0.86666667 1. ]
|
|
|
|
mean value: 0.8989279919642718
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.4 0.66666667 0.66666667 0.75
|
|
1. 0. 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.96296296 0.83870968 0.91666667 0.76470588 1.
|
|
1. 0.95238095 0.76470588 1. ]
|
|
|
|
mean value: 0.9200132024135819
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0.66666667 0.66666667 1.
|
|
0.66666667 0. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.84615385 1. 0.76923077
|
|
0.61538462 0.76923077 1. 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 nan 0.53333333 0.73333333 0.73333333 0.9
|
|
0.83333333 0.4 0.63333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.94444444 0.9008547 0.91111111 0.88461538
|
|
0.80769231 0.87350427 0.91111111 1. ]
|
|
|
|
mean value: 0.9222222222222222
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan 0.33333333 0.5 0.5 0.75
|
|
0.66666667 0. 0.4 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.96296296 0.83870968 0.78571429 0.76470588 0.76923077
|
|
0.61538462 0.74074074 0.76470588 1. ]
|
|
|
|
mean value: 0.8242154816158611
|
|
|
|
MCC on Blind test: -0.11
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08456826 0.07948041 0.08607769 0.07694125 0.076864 0.07677102
|
|
0.07680297 0.08007669 0.07709074 0.07826424]
|
|
|
|
mean value: 0.07929372787475586
|
|
|
|
key: score_time
|
|
value: [0.01530957 0.00458312 0.01483393 0.01464057 0.01444936 0.01463532
|
|
0.01460624 0.01483655 0.01560974 0.01539278]
|
|
|
|
mean value: 0.013889718055725097
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.25819889 1. 1. 0.6
|
|
1. 1. 0.74535599 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.625 1. 1. 0.75
|
|
1. 1. 0.875 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.57142857 1. 1. 0.75
|
|
1. 1. 0.8 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.5 1. 1. 0.6 1. 1. 1. 0.5]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 1. 1.
|
|
1. 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.63333333 1. 1. 0.8
|
|
1. 1. 0.83333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.4 1. 1. 0.6
|
|
1. 1. 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03413796 0.02626228 0.03821015 0.04624867 0.04347873 0.03817987
|
|
0.04379535 0.03776097 0.0358882 0.04368043]
|
|
|
|
mean value: 0.03876426219940186
|
|
|
|
key: score_time
|
|
value: [0.01770973 0.00455666 0.03768253 0.02266836 0.02085519 0.02388167
|
|
0.03207541 0.02712274 0.03294992 0.02003694]
|
|
|
|
mean value: 0.023953914642333984
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.74535599 1. 1. 0.6
|
|
0.74535599 1. 0.74535599 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.96986363 0.96986363 1. 0.96986363 0.94010481 1.
|
|
1. 1. 0.96986363 1. ]
|
|
|
|
mean value: 0.9819559310958537
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.875 1. 1. 0.75
|
|
0.875 1. 0.875 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98591549 0.98591549 1. 0.98591549 0.97183099 1.
|
|
1. 1. 0.98591549 1. ]
|
|
|
|
mean value: 0.9915492957746479
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.8 1. 1. 0.75 0.8 1. 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98039216 0.98039216 1. 0.98039216 0.96 1.
|
|
1. 1. 0.98039216 1. ]
|
|
|
|
mean value: 0.9881568627450981
|
|
|
|
key: test_precision
|
|
value: [1. nan 1. 1. 1. 0.6
|
|
1. 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 1. 1.
|
|
0.66666667 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.96153846 0.96153846 1. 0.96153846 0.92307692 1.
|
|
1. 1. 0.96153846 1. ]
|
|
|
|
mean value: 0.9769230769230769
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.83333333 1. 1. 0.8
|
|
0.83333333 1. 0.83333333 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98076923 0.98076923 1. 0.98076923 0.96153846 1.
|
|
1. 1. 0.98076923 1. ]
|
|
|
|
mean value: 0.9884615384615385
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.66666667 1. 1. 0.6
|
|
0.66666667 1. 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.96153846 0.96153846 1. 0.96153846 0.92307692 1.
|
|
1. 1. 0.96153846 1. ]
|
|
|
|
mean value: 0.9769230769230769
|
|
|
|
MCC on Blind test: 0.95
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01672459 0.01487875 0.01532412 0.01514101 0.01528788 0.01714444
|
|
0.015203 0.01521587 0.01526046 0.02377939]
|
|
|
|
mean value: 0.016395950317382814
|
|
|
|
key: score_time
|
|
value: [0.01178098 0.00585222 0.01161599 0.01168609 0.01185441 0.01175952
|
|
0.01163292 0.01173878 0.01166463 0.01176715]
|
|
|
|
mean value: 0.011135268211364745
|
|
|
|
key: test_mcc
|
|
value: [ 0.1490712 nan 0.74535599 -0.4472136 0.48795004 0.77459667
|
|
0.1490712 -0.29277002 0.48795004 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 nan 0.875 0.375 0.75 0.875
|
|
0.625 0.5 0.75 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 nan 0.8 0. 0.5 0.85714286
|
|
0.4 0. 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 nan 1. 0. 1. 0.75
|
|
0.5 0. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0.66666667 0. 0.33333333 1.
|
|
0.33333333 0. 0.33333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.56666667 nan 0.83333333 0.3 0.66666667 0.9
|
|
0.56666667 0.4 0.66666667 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 nan 0.66666667 0. 0.33333333 0.75
|
|
0.25 0. 0.33333333 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.16644502 0.15082502 0.17178988 0.15492988 0.15712714 0.15647697
|
|
0.15715313 0.15811729 0.12830138 0.17333484]
|
|
|
|
mean value: 0.1574500560760498
|
|
|
|
key: score_time
|
|
value: [0.00920653 0.00468278 0.00959659 0.00929022 0.009588 0.0092895
|
|
0.00926042 0.00923944 0.00946832 0.00917864]
|
|
|
|
mean value: 0.008880043029785156
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.46666667 0.77459667 1. 0.6
|
|
0.74535599 1. 0.74535599 0.73029674]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.75 0.875 1. 0.75
|
|
0.875 1. 0.875 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.66666667 0.85714286 1. 0.75
|
|
0.8 1. 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.75 1. 0.6
|
|
1. 1. 1. 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 1. 1.
|
|
0.66666667 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.73333333 0.9 1. 0.8
|
|
0.83333333 1. 0.83333333 0.9 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.5 0.75 1. 0.6
|
|
0.66666667 1. 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01092577 0.01285172 0.01395202 0.01353192 0.01414061 0.01355553
|
|
0.01369452 0.01398444 0.01363087 0.01456404]
|
|
|
|
mean value: 0.013483142852783203
|
|
|
|
key: score_time
|
|
value: [0.01237583 0.00615168 0.01228261 0.01294899 0.01175308 0.01328015
|
|
0.01323271 0.01173949 0.01321077 0.01327252]
|
|
|
|
mean value: 0.012024784088134765
|
|
|
|
key: test_mcc
|
|
value: [-0.46666667 nan -0.29277002 -0.29277002 -0.25819889 0.1490712
|
|
-0.46666667 -0.29277002 -0.4472136 -0.25819889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.25 nan 0.5 0.5 0.375 0.625
|
|
0.25 0.5 0.375 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.25 nan 0. 0. 0.28571429 0.4
|
|
0.25 0. 0. 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.2 nan 0. 0. 0.25 0.5 0.2 0. 0. 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 nan 0. 0. 0.33333333 0.33333333
|
|
0.33333333 0. 0. 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.26666667 nan 0.4 0.4 0.36666667 0.56666667
|
|
0.26666667 0.4 0.3 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.14285714 nan 0. 0. 0.16666667 0.25
|
|
0.14285714 0. 0. 0. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.14
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02124691 0.01254821 0.01597905 0.03487706 0.01338506 0.01416159
|
|
0.01252818 0.01272392 0.03272414 0.03284836]
|
|
|
|
mean value: 0.020302248001098634
|
|
|
|
key: score_time
|
|
value: [0.01174521 0.00608397 0.01180792 0.01995564 0.01202583 0.01157284
|
|
0.01147699 0.01158595 0.02148509 0.02132988]
|
|
|
|
mean value: 0.01390693187713623
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.46666667 0.6 0.74535599 0.6
|
|
0.77459667 0.1490712 0.46666667 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.9703421 0.9703421 0.9703421 0.9703421 1.
|
|
0.9703421 0.9703421 1. ]
|
|
|
|
mean value: 0.9822052584466778
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.75 0.75 0.875 0.75
|
|
0.875 0.625 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.98591549 0.98591549 0.98591549 0.98591549
|
|
1. 0.98591549 0.98591549 1. ]
|
|
|
|
mean value: 0.9915492957746479
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.66666667 0.75 0.8 0.75
|
|
0.85714286 0.4 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.98113208 0.98113208 0.98113208 0.98113208
|
|
1. 0.98113208 0.98113208 1. ]
|
|
|
|
mean value: 0.9886792452830189
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.6 1. 0.6
|
|
0.75 0.5 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.96296296 0.96296296 0.96296296 0.96296296
|
|
1. 0.96296296 0.96296296 1. ]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 0.66666667 1.
|
|
1. 0.33333333 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.73333333 0.8 0.83333333 0.8
|
|
0.9 0.56666667 0.73333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:107: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:110: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[1. 1. 0.98888889 0.98888889 0.98888889 0.98888889
|
|
1. 0.98888889 0.98888889 1. ]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.5 0.6 0.66666667 0.6
|
|
0.75 0.25 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.96296296 0.96296296 0.96296296 0.96296296
|
|
1. 0.96296296 0.96296296 1. ]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.18935609 0.18282723 0.19223666 0.17152286 0.21240568 0.25662017
|
|
0.20577478 0.19005656 0.19769907 0.12422442]
|
|
|
|
mean value: 0.19227235317230223
|
|
|
|
key: score_time
|
|
value: [0.02291918 0.01264501 0.02030349 0.02279305 0.02387357 0.02354836
|
|
0.02319574 0.02317953 0.01894236 0.01210165]
|
|
|
|
mean value: 0.020350193977355956
|
|
|
|
key: test_mcc
|
|
value: [0.74535599 nan 0.46666667 0.6 0.74535599 0.6
|
|
0.77459667 0.1490712 0.46666667 0.54772256]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.9703421 0.9703421 0.9703421 0.9703421 1.
|
|
0.9703421 0.9703421 1. ]
|
|
|
|
mean value: 0.9822052584466778
|
|
|
|
key: test_accuracy
|
|
value: [0.875 nan 0.75 0.75 0.875 0.75
|
|
0.875 0.625 0.75 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.98591549 0.98591549 0.98591549 0.98591549
|
|
1. 0.98591549 0.98591549 1. ]
|
|
|
|
mean value: 0.9915492957746479
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan 0.66666667 0.75 0.8 0.75
|
|
0.85714286 0.4 0.66666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.98113208 0.98113208 0.98113208 0.98113208
|
|
1. 0.98113208 0.98113208 1. ]
|
|
|
|
mean value: 0.9886792452830189
|
|
|
|
key: test_precision
|
|
value: [1. nan 0.66666667 0.6 1. 0.6
|
|
0.75 0.5 0.66666667 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.96296296 0.96296296 0.96296296 0.96296296
|
|
1. 0.96296296 0.96296296 1. ]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: test_recall
|
|
value: [0.66666667 nan 0.66666667 1. 0.66666667 1.
|
|
1. 0.33333333 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 nan 0.73333333 0.8 0.83333333 0.8
|
|
0.9 0.56666667 0.73333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.98888889 0.98888889 0.98888889 0.98888889
|
|
1. 0.98888889 0.98888889 1. ]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan 0.5 0.6 0.66666667 0.6
|
|
0.75 0.25 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.96296296 0.96296296 0.96296296 0.96296296
|
|
1. 0.96296296 0.96296296 1. ]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02668691 0.02563429 0.02581406 0.02586079 0.02586055 0.02636909
|
|
0.02358246 0.02464199 0.02246094 0.02333188]
|
|
|
|
mean value: 0.02502429485321045
|
|
|
|
key: score_time
|
|
value: [0.01169181 0.00628114 0.00639367 0.0120008 0.01172805 0.0117023
|
|
0.01168251 0.01178622 0.01171184 0.01170659]
|
|
|
|
mean value: 0.010668492317199707
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 nan nan 0.81649658 0.6 0.81649658
|
|
0.65465367 0.81649658 0.21821789 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.91111111 0.91111111 0.88910845 0.91111111 0.91111111 0.91111111
|
|
0.93356387 0.88910845 0.91111111 0.88910845]
|
|
|
|
mean value: 0.9067555884909202
|
|
|
|
key: test_accuracy
|
|
value: [0.8 nan nan 0.9 0.8 0.9 0.8 0.9 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.95555556 0.95555556 0.94444444 0.95555556 0.95555556 0.95555556
|
|
0.96666667 0.94444444 0.95555556 0.94444444]
|
|
|
|
mean value: 0.9533333333333334
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 nan nan 0.90909091 0.8 0.88888889
|
|
0.75 0.90909091 0.66666667 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.95555556 0.95555556 0.94505495 0.95555556 0.95555556 0.95555556
|
|
0.96703297 0.94505495 0.95555556 0.94505495]
|
|
|
|
mean value: 0.9535531135531136
|
|
|
|
key: test_precision
|
|
value: [0.71428571 nan nan 0.83333333 0.8 1.
|
|
1. 0.83333333 0.57142857 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.95555556 0.95555556 0.93478261 0.95555556 0.95555556 0.95555556
|
|
0.95652174 0.93478261 0.95555556 0.93478261]
|
|
|
|
mean value: 0.9494202898550725
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 0.8 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.95555556 0.95555556 0.95555556 0.95555556 0.95555556 0.95555556
|
|
0.97777778 0.95555556 0.95555556 0.95555556]
|
|
|
|
mean value: 0.9577777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 nan nan 0.9 0.8 0.9 0.8 0.9 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.95555556 0.95555556 0.94444444 0.95555556 0.95555556 0.95555556
|
|
0.96666667 0.94444444 0.95555556 0.94444444]
|
|
|
|
mean value: 0.9533333333333335
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 nan nan 0.83333333 0.66666667 0.8
|
|
0.6 0.83333333 0.5 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.91489362 0.91489362 0.89583333 0.91489362 0.91489362 0.91489362
|
|
0.93617021 0.89583333 0.91489362 0.89583333]
|
|
|
|
mean value: 0.9113031914893617
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.67839217 0.48034787 0.64701414 0.51438808 0.60679531 0.65355921
|
|
0.49431872 0.5318253 0.58624101 0.55083251]
|
|
|
|
mean value: 0.5743714332580566
|
|
|
|
key: score_time
|
|
value: [0.01309419 0.00647235 0.00659251 0.01193786 0.01207781 0.01458716
|
|
0.01210022 0.01188588 0.01342583 0.01195741]
|
|
|
|
mean value: 0.011413121223449707
|
|
|
|
key: test_mcc
|
|
value: [0.6 nan nan 0.81649658 0.40824829 0.81649658
|
|
0.65465367 0.81649658 0.40824829 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.93356387 0.95555556 0.97801929 0.73333333 1.
|
|
0.93356387 0.97801929 1. 0.95650071]
|
|
|
|
mean value: 0.9468555933963895
|
|
|
|
key: test_accuracy
|
|
value: [0.8 nan nan 0.9 0.7 0.9 0.8 0.9 0.7 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.96666667 0.97777778 0.98888889 0.86666667 1.
|
|
0.96666667 0.98888889 1. 0.97777778]
|
|
|
|
mean value: 0.9733333333333334
|
|
|
|
key: test_fscore
|
|
value: [0.8 nan nan 0.90909091 0.66666667 0.88888889
|
|
0.75 0.90909091 0.72727273 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.96703297 0.97777778 0.98901099 0.86666667 1.
|
|
0.96703297 0.98901099 1. 0.97826087]
|
|
|
|
mean value: 0.9734793226097574
|
|
|
|
key: test_precision
|
|
value: [0.8 nan nan 0.83333333 0.75 1.
|
|
1. 0.83333333 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.95652174 0.97777778 0.97826087 0.86666667 1.
|
|
0.95652174 0.97826087 1. 0.95744681]
|
|
|
|
mean value: 0.9671456470346387
|
|
|
|
key: test_recall
|
|
value: [0.8 nan nan 1. 0.6 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 0.97777778 0.97777778 1. 0.86666667 1.
|
|
0.97777778 1. 1. 1. ]
|
|
|
|
mean value: 0.98
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 nan nan 0.9 0.7 0.9 0.8 0.9 0.7 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.96666667 0.97777778 0.98888889 0.86666667 1.
|
|
0.96666667 0.98888889 1. 0.97777778]
|
|
|
|
mean value: 0.9733333333333334
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 nan nan 0.83333333 0.5 0.8
|
|
0.6 0.83333333 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.93617021 0.95652174 0.97826087 0.76470588 1.
|
|
0.93617021 0.97826087 1. 0.95744681]
|
|
|
|
mean value: 0.9507536594656364
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01233888 0.01138854 0.01001382 0.00979853 0.00967813 0.00976253
|
|
0.00926352 0.00987411 0.00966692 0.00973201]
|
|
|
|
mean value: 0.01015169620513916
|
|
|
|
key: score_time
|
|
value: [0.01177788 0.00480366 0.00498605 0.00967026 0.00950933 0.00950623
|
|
0.00948453 0.00956655 0.00957298 0.0094316 ]
|
|
|
|
mean value: 0.008830904960632324
|
|
|
|
key: test_mcc
|
|
value: [-0.33333333 nan nan 0.2 0.5 0.21821789
|
|
0.40824829 0.5 0.21821789 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.46537892 0.44444444 0.70004007 0.73994007 0.67809594 0.641948
|
|
0.78478493 0.641948 0.82548988 0.65996633]
|
|
|
|
mean value: 0.6582036578842267
|
|
|
|
key: test_accuracy
|
|
value: [0.4 nan nan 0.6 0.7 0.6 0.7 0.7 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.7 0.67777778 0.84444444 0.86666667 0.82222222 0.8
|
|
0.88888889 0.8 0.91111111 0.81111111]
|
|
|
|
mean value: 0.8122222222222222
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 nan nan 0.6 0.76923077 0.66666667
|
|
0.66666667 0.76923077 0.66666667 0.71428571]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.76106195 0.75213675 0.85714286 0.875 0.84615385 0.83018868
|
|
0.89583333 0.83018868 0.90697674 0.83809524]
|
|
|
|
mean value: 0.8392778076441294
|
|
|
|
key: test_precision
|
|
value: [0.44444444 nan nan 0.6 0.625 0.57142857
|
|
0.75 0.625 0.57142857 0.55555556]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.63235294 0.61111111 0.79245283 0.82352941 0.74576271 0.72131148
|
|
0.84313725 0.72131148 0.95121951 0.73333333]
|
|
|
|
mean value: 0.7575522057355462
|
|
|
|
key: test_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.8 nan nan 0.6 1. 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.95555556 0.97777778 0.93333333 0.93333333 0.97777778 0.97777778
|
|
0.95555556 0.97777778 0.86666667 0.97777778]
|
|
|
|
mean value: 0.9533333333333334
|
|
|
|
key: test_roc_auc
|
|
value: [0.4 nan nan 0.6 0.7 0.6 0.7 0.7 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.7 0.67777778 0.84444444 0.86666667 0.82222222 0.8
|
|
0.88888889 0.8 0.91111111 0.81111111]
|
|
|
|
mean value: 0.8122222222222223
|
|
|
|
key: test_jcc
|
|
value: [0.4 nan nan 0.42857143 0.625 0.5
|
|
0.5 0.625 0.5 0.55555556]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.61428571 0.60273973 0.75 0.77777778 0.73333333 0.70967742
|
|
0.81132075 0.70967742 0.82978723 0.72131148]
|
|
|
|
mean value: 0.7259910854303271
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01022792 0.00995421 0.01036096 0.00926161 0.01014042 0.00990748
|
|
0.00995111 0.00963116 0.00946689 0.01010084]
|
|
|
|
mean value: 0.009900259971618652
|
|
|
|
key: score_time
|
|
value: [0.00955129 0.00466061 0.00484204 0.00874567 0.00902104 0.00952482
|
|
0.00964499 0.00959897 0.00870395 0.00953412]
|
|
|
|
mean value: 0.008382749557495118
|
|
|
|
key: test_mcc
|
|
value: [0.21821789 nan nan 0.21821789 0.6 0.40824829
|
|
0.40824829 0.65465367 0. 0.40824829]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.73624773 0.71269665 0.67082039 0.8001976 0.76026311 0.75724019
|
|
0.73333333 0.68957028 0.8001976 0.69162666]
|
|
|
|
mean value: 0.7352193554584638
|
|
|
|
key: test_accuracy
|
|
value: [0.6 nan nan 0.6 0.8 0.7 0.7 0.8 0.5 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.86666667 0.85555556 0.83333333 0.9 0.87777778 0.87777778
|
|
0.86666667 0.84444444 0.9 0.84444444]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan nan 0.66666667 0.8 0.66666667
|
|
0.66666667 0.83333333 0.54545455 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.87234043 0.86021505 0.84210526 0.9010989 0.88421053 0.88172043
|
|
0.86666667 0.84782609 0.8988764 0.85106383]
|
|
|
|
mean value: 0.8706123587880272
|
|
|
|
key: test_precision
|
|
value: [0.66666667 nan nan 0.57142857 0.8 0.75
|
|
0.75 0.71428571 0.5 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.83673469 0.83333333 0.8 0.89130435 0.84 0.85416667
|
|
0.86666667 0.82978723 0.90909091 0.81632653]
|
|
|
|
mean value: 0.8477410382116012
|
|
|
|
key: test_recall
|
|
value: [0.4 nan nan 0.8 0.8 0.6 0.6 1. 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.91111111 0.88888889 0.88888889 0.91111111 0.93333333 0.91111111
|
|
0.86666667 0.86666667 0.88888889 0.88888889]
|
|
|
|
mean value: 0.8955555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 nan nan 0.6 0.8 0.7 0.7 0.8 0.5 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.86666667 0.85555556 0.83333333 0.9 0.87777778 0.87777778
|
|
0.86666667 0.84444444 0.9 0.84444444]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan nan 0.5 0.66666667 0.5
|
|
0.5 0.71428571 0.375 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.77358491 0.75471698 0.72727273 0.82 0.79245283 0.78846154
|
|
0.76470588 0.73584906 0.81632653 0.74074074]
|
|
|
|
mean value: 0.7714111193025098
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00974655 0.00932145 0.01000047 0.00874233 0.00839186 0.00837994
|
|
0.00840974 0.00860238 0.00835204 0.00856972]
|
|
|
|
mean value: 0.00885164737701416
|
|
|
|
key: score_time
|
|
value: [0.01500773 0.00461984 0.00566602 0.01129222 0.00940275 0.00949812
|
|
0.01562929 0.00947976 0.00929308 0.00932264]
|
|
|
|
mean value: 0.00992114543914795
|
|
|
|
key: test_mcc
|
|
value: [0.2 nan nan 0.5 0.5 0.
|
|
0.21821789 0.65465367 0. 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.69162666 0.76486616 0.62609903 0.68888889 0.67488191 0.69509522
|
|
0.64700558 0.73624773 0.76026311 0.69509522]
|
|
|
|
mean value: 0.6980069521372378
|
|
|
|
key: test_accuracy
|
|
value: [0.6 nan nan 0.7 0.7 0.5 0.6 0.8 0.5 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.84444444 0.87777778 0.81111111 0.84444444 0.83333333 0.84444444
|
|
0.82222222 0.86666667 0.87777778 0.84444444]
|
|
|
|
mean value: 0.8466666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.6 nan nan 0.76923077 0.76923077 0.54545455
|
|
0.66666667 0.83333333 0.61538462 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.85106383 0.88659794 0.82105263 0.84444444 0.84536082 0.85416667
|
|
0.82978723 0.87234043 0.88421053 0.85416667]
|
|
|
|
mean value: 0.8543191187920814
|
|
|
|
key: test_precision
|
|
value: [0.6 nan nan 0.625 0.625 0.5
|
|
0.57142857 0.71428571 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.81632653 0.82692308 0.78 0.84444444 0.78846154 0.80392157
|
|
0.79591837 0.83673469 0.84 0.80392157]
|
|
|
|
mean value: 0.8136651788920697
|
|
|
|
key: test_recall
|
|
value: [0.6 nan nan 1. 1. 0.6 0.8 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.88888889 0.95555556 0.86666667 0.84444444 0.91111111 0.91111111
|
|
0.86666667 0.91111111 0.93333333 0.91111111]
|
|
|
|
mean value: 0.9
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 nan nan 0.7 0.7 0.5 0.6 0.8 0.5 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.84444444 0.87777778 0.81111111 0.84444444 0.83333333 0.84444444
|
|
0.82222222 0.86666667 0.87777778 0.84444444]
|
|
|
|
mean value: 0.8466666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 nan nan 0.625 0.625 0.375
|
|
0.5 0.71428571 0.44444444 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.74074074 0.7962963 0.69642857 0.73076923 0.73214286 0.74545455
|
|
0.70909091 0.77358491 0.79245283 0.74545455]
|
|
|
|
mean value: 0.7462415432226753
|
|
|
|
MCC on Blind test: -0.04
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01139522 0.00962234 0.0094831 0.00941205 0.0092721 0.00994563
|
|
0.00933862 0.00987744 0.00951862 0.00935507]
|
|
|
|
mean value: 0.009722018241882324
|
|
|
|
key: score_time
|
|
value: [0.00916266 0.00445056 0.00433707 0.00926876 0.00877261 0.00935793
|
|
0.00875735 0.00887942 0.00921822 0.00887632]
|
|
|
|
mean value: 0.008108091354370118
|
|
|
|
key: test_mcc
|
|
value: [0.2 nan nan 0.81649658 0.65465367 0.81649658
|
|
0.40824829 0.5 0.21821789 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.82548988 0.84465303 0.84465303 0.8230355 0.88910845 0.86666667
|
|
0.86666667 0.86666667 0.89087081 0.82222222]
|
|
|
|
mean value: 0.8540032913422
|
|
|
|
key: test_accuracy
|
|
value: [0.6 nan nan 0.9 0.8 0.9 0.7 0.7 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.91111111 0.92222222 0.92222222 0.91111111 0.94444444 0.93333333
|
|
0.93333333 0.93333333 0.94444444 0.91111111]
|
|
|
|
mean value: 0.9266666666666666
|
|
|
|
key: test_fscore
|
|
value: [0.6 nan nan 0.90909091 0.83333333 0.88888889
|
|
0.66666667 0.76923077 0.66666667 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.90697674 0.92307692 0.92307692 0.90909091 0.94505495 0.93333333
|
|
0.93333333 0.93333333 0.94252874 0.91111111]
|
|
|
|
mean value: 0.9260916291229042
|
|
|
|
key: test_precision
|
|
value: [0.6 nan nan 0.83333333 0.71428571 1.
|
|
0.75 0.625 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.95121951 0.91304348 0.91304348 0.93023256 0.93478261 0.93333333
|
|
0.93333333 0.93333333 0.97619048 0.91111111]
|
|
|
|
mean value: 0.9329623222853636
|
|
|
|
key: test_recall
|
|
value: [0.6 nan nan 1. 1. 0.8 0.6 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.86666667 0.93333333 0.93333333 0.88888889 0.95555556 0.93333333
|
|
0.93333333 0.93333333 0.91111111 0.91111111]
|
|
|
|
mean value: 0.92
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 nan nan 0.9 0.8 0.9 0.7 0.7 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.91111111 0.92222222 0.92222222 0.91111111 0.94444444 0.93333333
|
|
0.93333333 0.93333333 0.94444444 0.91111111]
|
|
|
|
mean value: 0.9266666666666666
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 nan nan 0.83333333 0.71428571 0.8
|
|
0.5 0.625 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.82978723 0.85714286 0.85714286 0.83333333 0.89583333 0.875
|
|
0.875 0.875 0.89130435 0.83673469]
|
|
|
|
mean value: 0.8626278656698572
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.37492514 0.37046218 0.38234639 0.44610929 0.35609388 0.35016608
|
|
0.38355708 0.37324095 0.34541011 0.59514403]
|
|
|
|
mean value: 0.39774551391601565
|
|
|
|
key: score_time
|
|
value: [0.01198101 0.00661445 0.00665283 0.01227736 0.01198196 0.0120244
|
|
0.01200557 0.01206875 0.01202154 0.0120492 ]
|
|
|
|
mean value: 0.010967707633972168
|
|
|
|
key: test_mcc
|
|
value: [0.2 nan nan 0.5 0.6 0.81649658
|
|
0.40824829 0.65465367 0.21821789 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6 nan nan 0.7 0.8 0.9 0.7 0.8 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.6 nan nan 0.76923077 0.8 0.88888889
|
|
0.66666667 0.83333333 0.66666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.6 nan nan 0.625 0.8 1.
|
|
0.75 0.71428571 0.57142857 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.6 nan nan 1. 0.8 0.8 0.6 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 nan nan 0.7 0.8 0.9 0.7 0.8 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.42857143 nan nan 0.625 0.66666667 0.8
|
|
0.5 0.71428571 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01511621 0.01407719 0.01127934 0.01145411 0.01078749 0.01093793
|
|
0.01074314 0.01044631 0.01040411 0.01171279]
|
|
|
|
mean value: 0.01169586181640625
|
|
|
|
key: score_time
|
|
value: [0.01174808 0.00465941 0.00485921 0.00918746 0.00877357 0.00866652
|
|
0.00861716 0.0088768 0.00946498 0.00948787]
|
|
|
|
mean value: 0.008434104919433593
|
|
|
|
key: test_mcc
|
|
value: [1. nan nan 0.6 0.81649658 0.6
|
|
0.65465367 0.2 0.40824829 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. nan nan 0.8 0.9 0.8 0.8 0.6 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. nan nan 0.8 0.90909091 0.8
|
|
0.75 0.6 0.72727273 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 0.8 0.83333333 0.8
|
|
1. 0.6 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 0.8 1. 0.8 0.6 0.6 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. nan nan 0.8 0.9 0.8 0.8 0.6 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. nan nan 0.66666667 0.83333333 0.66666667
|
|
0.6 0.42857143 0.57142857 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08262682 0.08500862 0.0852046 0.08543134 0.08545947 0.08390474
|
|
0.08447599 0.08485103 0.08712196 0.08414125]
|
|
|
|
mean value: 0.08482258319854737
|
|
|
|
key: score_time
|
|
value: [0.01772976 0.0046699 0.00469708 0.01746058 0.01755953 0.01747441
|
|
0.01821804 0.01745749 0.01838636 0.01701069]
|
|
|
|
mean value: 0.015066385269165039
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.6 0.81649658 0.65465367
|
|
0.2 0.5 0.21821789 0.65465367]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.8 0.9 0.8 0.6 0.7 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 nan nan 0.8 0.90909091 0.75
|
|
0.6 0.76923077 0.66666667 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 0.8 0.83333333 1.
|
|
0.6 0.625 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 nan nan 0.8 1. 0.6 0.6 1. 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.8 0.9 0.8 0.6 0.7 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 nan nan 0.66666667 0.83333333 0.6
|
|
0.42857143 0.625 0.5 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00895452 0.00875449 0.00881481 0.00910234 0.00885272 0.00899577
|
|
0.00944829 0.00882363 0.00928545 0.00872445]
|
|
|
|
mean value: 0.008975648880004882
|
|
|
|
key: score_time
|
|
value: [0.00846648 0.00439858 0.00429606 0.00868988 0.00855374 0.00891685
|
|
0.0090065 0.00850105 0.00932431 0.00891042]
|
|
|
|
mean value: 0.007906389236450196
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 nan nan 0.40824829 0.65465367 0.65465367
|
|
0.40824829 0.33333333 0.81649658 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7 nan nan 0.7 0.8 0.8 0.7 0.6 0.9 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 nan nan 0.66666667 0.75 0.75
|
|
0.66666667 0.71428571 0.88888889 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 nan nan 0.75 1. 1.
|
|
0.75 0.55555556 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.6 nan nan 0.6 0.6 0.6 0.6 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 nan nan 0.7 0.8 0.8 0.7 0.6 0.9 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 nan nan 0.5 0.6 0.6
|
|
0.5 0.55555556 0.8 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.06
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.05830765 1.05878472 1.03069663 1.03076839 1.05383444 1.04063582
|
|
1.02942109 1.03039336 1.04405308 1.04590487]
|
|
|
|
mean value: 1.0422800064086915
|
|
|
|
key: score_time
|
|
value: [0.09411407 0.00471926 0.00459909 0.09293795 0.09200311 0.08624506
|
|
0.08817077 0.08822632 0.08993959 0.09330368]
|
|
|
|
mean value: 0.07342588901519775
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.65465367 0.81649658 0.81649658
|
|
0.65465367 0.65465367 0.40824829 0.65465367]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.8 0.8 0.7 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 nan nan 0.75 0.90909091 0.88888889
|
|
0.75 0.83333333 0.72727273 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 1. 0.83333333 1.
|
|
1. 0.71428571 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 nan nan 0.6 1. 0.8 0.6 1. 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.8 0.8 0.7 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 nan nan 0.6 0.83333333 0.8
|
|
0.6 0.71428571 0.57142857 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.85999107 0.87137699 0.85217786 0.87492132 0.86495948 0.84439301
|
|
0.88340187 0.85964608 0.85427666 0.96079803]
|
|
|
|
mean value: 0.8725942373275757
|
|
|
|
key: score_time
|
|
value: [0.2267487 0.00457191 0.00463676 0.16841388 0.15269232 0.22285342
|
|
0.1705575 0.19267631 0.23203325 0.16924214]
|
|
|
|
mean value: 0.1544426202774048
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.65465367 1. 0.81649658
|
|
0.65465367 0.65465367 0.40824829 0.65465367]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 0.97801929 0.95555556 0.97801929 0.97801929
|
|
0.97801929 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9801690612461116
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.8 1. 0.9 0.8 0.8 0.7 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 0.98888889 0.97777778 0.98888889 0.98888889
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 nan nan 0.75 1. 0.88888889
|
|
0.75 0.83333333 0.72727273 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 0.98901099 0.97777778 0.98901099 0.98876404
|
|
0.98901099 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9900607756787532
|
|
|
|
key: test_precision
|
|
value: [0.83333333 nan nan 1. 1. 1.
|
|
1. 0.71428571 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 0.97826087 0.97777778 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9847342995169082
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 0.6 1. 0.8 0.6 1. 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.97777778 1. 0.97777778
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9955555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.8 1. 0.9 0.8 0.8 0.7 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.98888889 0.97777778 0.98888889 0.98888889
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 nan nan 0.6 1. 0.8
|
|
0.6 0.71428571 0.57142857 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 0.97826087 0.95652174 0.97826087 0.97777778
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9803864734299517
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02133918 0.00901937 0.00918055 0.00923777 0.00924492 0.00955749
|
|
0.00870299 0.00861335 0.00901318 0.00863385]
|
|
|
|
mean value: 0.010254263877868652
|
|
|
|
key: score_time
|
|
value: [0.01596808 0.00419235 0.00490522 0.00853872 0.0092361 0.00877595
|
|
0.00848365 0.00854468 0.00853395 0.00849319]
|
|
|
|
mean value: 0.008567190170288086
|
|
|
|
key: test_mcc
|
|
value: [0.21821789 nan nan 0.21821789 0.6 0.40824829
|
|
0.40824829 0.65465367 0. 0.40824829]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.73624773 0.71269665 0.67082039 0.8001976 0.76026311 0.75724019
|
|
0.73333333 0.68957028 0.8001976 0.69162666]
|
|
|
|
mean value: 0.7352193554584638
|
|
|
|
key: test_accuracy
|
|
value: [0.6 nan nan 0.6 0.8 0.7 0.7 0.8 0.5 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.86666667 0.85555556 0.83333333 0.9 0.87777778 0.87777778
|
|
0.86666667 0.84444444 0.9 0.84444444]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.5 nan nan 0.66666667 0.8 0.66666667
|
|
0.66666667 0.83333333 0.54545455 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.87234043 0.86021505 0.84210526 0.9010989 0.88421053 0.88172043
|
|
0.86666667 0.84782609 0.8988764 0.85106383]
|
|
|
|
mean value: 0.8706123587880272
|
|
|
|
key: test_precision
|
|
value: [0.66666667 nan nan 0.57142857 0.8 0.75
|
|
0.75 0.71428571 0.5 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.83673469 0.83333333 0.8 0.89130435 0.84 0.85416667
|
|
0.86666667 0.82978723 0.90909091 0.81632653]
|
|
|
|
mean value: 0.8477410382116012
|
|
|
|
key: test_recall
|
|
value: [0.4 nan nan 0.8 0.8 0.6 0.6 1. 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.91111111 0.88888889 0.88888889 0.91111111 0.93333333 0.91111111
|
|
0.86666667 0.86666667 0.88888889 0.88888889]
|
|
|
|
mean value: 0.8955555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 nan nan 0.6 0.8 0.7 0.7 0.8 0.5 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.86666667 0.85555556 0.83333333 0.9 0.87777778 0.87777778
|
|
0.86666667 0.84444444 0.9 0.84444444]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 nan nan 0.5 0.66666667 0.5
|
|
0.5 0.71428571 0.375 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.77358491 0.75471698 0.72727273 0.82 0.79245283 0.78846154
|
|
0.76470588 0.73584906 0.81632653 0.74074074]
|
|
|
|
mean value: 0.7714111193025098
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.0385325 0.07943583 0.07845998 0.04148769 0.03596449 0.03703904
|
|
0.07786202 0.07349586 0.03462934 0.08684826]
|
|
|
|
mean value: 0.05837550163269043
|
|
|
|
key: score_time
|
|
value: [0.01043248 0.00514793 0.00486374 0.01063561 0.01104069 0.01106238
|
|
0.01134038 0.0113461 0.01019478 0.01071811]
|
|
|
|
mean value: 0.009678220748901368
|
|
|
|
key: test_mcc
|
|
value: [1. nan nan 1. 1. 0.6
|
|
0.81649658 0.6 0.5 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. nan nan 1. 1. 0.8 0.9 0.8 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. nan nan 1. 1. 0.8
|
|
0.88888889 0.8 0.76923077 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 1. 1. 0.8 1. 0.8 0.625 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.8 0.8 0.8 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. nan nan 1. 1. 0.8 0.9 0.8 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. nan nan 1. 1. 0.66666667
|
|
0.8 0.66666667 0.625 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03867555 0.02319288 0.02706194 0.02529454 0.05284071 0.11862731
|
|
0.03852725 0.04383326 0.03280854 0.04009724]
|
|
|
|
mean value: 0.04409592151641846
|
|
|
|
key: score_time
|
|
value: [0.0272522 0.00630307 0.00678635 0.01231098 0.02257872 0.02594495
|
|
0.01236582 0.01228881 0.02403021 0.02088833]
|
|
|
|
mean value: 0.017074942588806152
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 nan nan 0.65465367 0.81649658 0.2
|
|
0.21821789 0.6 0.40824829 0.40824829]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 nan nan 0.8 0.9 0.6 0.6 0.8 0.7 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 nan nan 0.83333333 0.90909091 0.6
|
|
0.66666667 0.8 0.72727273 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 nan nan 0.71428571 0.83333333 0.6
|
|
0.57142857 0.8 0.66666667 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.6 0.8 0.8 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 nan nan 0.8 0.9 0.6 0.6 0.8 0.7 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 nan nan 0.71428571 0.83333333 0.42857143
|
|
0.5 0.66666667 0.57142857 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.02235413 0.01077485 0.01104784 0.00940537 0.00992942 0.00974631
|
|
0.01046252 0.01258326 0.00965023 0.00989842]
|
|
|
|
mean value: 0.011585235595703125
|
|
|
|
key: score_time
|
|
value: [0.010818 0.0060277 0.00485873 0.01032233 0.00985336 0.0115025
|
|
0.00923038 0.00966311 0.01026726 0.01118207]
|
|
|
|
mean value: 0.009372544288635255
|
|
|
|
key: test_mcc
|
|
value: [0. nan nan 0.40824829 0.2 0.81649658
|
|
0.40824829 0.81649658 0.21821789 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.68957028 0.69162666 0.67082039 0.68957028 0.69162666 0.64508188
|
|
0.73405869 0.68957028 0.75724019 0.6681531 ]
|
|
|
|
mean value: 0.6927318416110331
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan nan 0.7 0.6 0.9 0.7 0.9 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.84444444 0.84444444 0.83333333 0.84444444 0.84444444 0.82222222
|
|
0.86666667 0.84444444 0.87777778 0.83333333]
|
|
|
|
mean value: 0.8455555555555556
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 nan nan 0.72727273 0.6 0.88888889
|
|
0.66666667 0.90909091 0.66666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.84090909 0.85106383 0.84210526 0.84782609 0.85106383 0.82608696
|
|
0.86956522 0.84782609 0.87356322 0.83870968]
|
|
|
|
mean value: 0.84887192572777
|
|
|
|
key: test_precision
|
|
value: [0.5 nan nan 0.66666667 0.6 1.
|
|
0.75 0.83333333 0.57142857 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.86046512 0.81632653 0.8 0.82978723 0.81632653 0.80851064
|
|
0.85106383 0.82978723 0.9047619 0.8125 ]
|
|
|
|
mean value: 0.8329529018435677
|
|
|
|
key: test_recall
|
|
value: [0.6 nan nan 0.8 0.6 0.8 0.6 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.82222222 0.88888889 0.88888889 0.86666667 0.88888889 0.84444444
|
|
0.88888889 0.86666667 0.84444444 0.86666667]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 nan nan 0.7 0.6 0.9 0.7 0.9 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.84444444 0.84444444 0.83333333 0.84444444 0.84444444 0.82222222
|
|
0.86666667 0.84444444 0.87777778 0.83333333]
|
|
|
|
mean value: 0.8455555555555556
|
|
|
|
key: test_jcc
|
|
value: [0.375 nan nan 0.57142857 0.42857143 0.8
|
|
0.5 0.83333333 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.7254902 0.74074074 0.72727273 0.73584906 0.74074074 0.7037037
|
|
0.76923077 0.73584906 0.7755102 0.72222222]
|
|
|
|
mean value: 0.7376609417278515
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01186442 0.01330304 0.01402235 0.01382852 0.01425862 0.01367593
|
|
0.01314926 0.01479983 0.01317763 0.01350975]
|
|
|
|
mean value: 0.01355893611907959
|
|
|
|
key: score_time
|
|
value: [0.0099504 0.00586057 0.00639057 0.01186395 0.01181006 0.01167011
|
|
0.01159191 0.01164412 0.01165104 0.01212764]
|
|
|
|
mean value: 0.010456037521362305
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 nan nan 0.81649658 0.6 0.81649658
|
|
0.40824829 0.65465367 0.21821789 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.95555556 0.93541435 0.93541435 1. 0.95650071 0.97801929
|
|
0.93541435 0.97801929 0.95555556 0.93541435]
|
|
|
|
mean value: 0.9565307800167633
|
|
|
|
key: test_accuracy
|
|
value: [0.7 nan nan 0.9 0.8 0.9 0.7 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.97777778 0.96666667 0.96666667 1. 0.97777778 0.98888889
|
|
0.96666667 0.98888889 0.97777778 0.96666667]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 nan nan 0.90909091 0.8 0.88888889
|
|
0.66666667 0.83333333 0.66666667 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.97777778 0.96774194 0.96774194 1. 0.97826087 0.98901099
|
|
0.96774194 0.98876404 0.97777778 0.96774194]
|
|
|
|
mean value: 0.9782559201011066
|
|
|
|
key: test_precision
|
|
value: [0.66666667 nan nan 0.83333333 0.8 1.
|
|
0.75 0.71428571 0.57142857 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.97777778 0.9375 0.9375 1. 0.95744681 0.97826087
|
|
0.9375 1. 0.97777778 0.9375 ]
|
|
|
|
mean value: 0.9641263233631411
|
|
|
|
key: test_recall
|
|
value: [0.8 nan nan 1. 0.8 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.97777778 1. 1. 1. 1. 1.
|
|
1. 0.97777778 0.97777778 1. ]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 nan nan 0.9 0.8 0.9 0.7 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.97777778 0.96666667 0.96666667 1. 0.97777778 0.98888889
|
|
0.96666667 0.98888889 0.97777778 0.96666667]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 nan nan 0.83333333 0.66666667 0.8
|
|
0.5 0.71428571 0.5 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.95652174 0.9375 0.9375 1. 0.95744681 0.97826087
|
|
0.9375 0.97777778 0.95652174 0.9375 ]
|
|
|
|
mean value: 0.9576528934114503
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01379657 0.01267099 0.01356316 0.01448107 0.01277828 0.01268101
|
|
0.01251578 0.01251745 0.01239395 0.01302099]
|
|
|
|
mean value: 0.013041925430297852
|
|
|
|
key: score_time
|
|
value: [0.01063323 0.00612164 0.00623727 0.01173258 0.01149225 0.01172876
|
|
0.01138878 0.0114789 0.0114975 0.01158094]
|
|
|
|
mean value: 0.010389184951782227
|
|
|
|
key: test_mcc
|
|
value: [0. nan nan 0.40824829 0.6 0.81649658
|
|
0.40824829 1. 0.21821789 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.83553169 0.65465367 0.85485041 0.70710678 0.97801929 0.97801929
|
|
0.93541435 0.89442719 0.97801929 0.6894997 ]
|
|
|
|
mean value: 0.8505541676741646
|
|
|
|
key: test_accuracy
|
|
value: [0.5 nan nan 0.7 0.8 0.9 0.7 1. 0.6 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.91111111 0.8 0.92222222 0.83333333 0.98888889 0.98888889
|
|
0.96666667 0.94444444 0.98888889 0.82222222]
|
|
|
|
mean value: 0.9166666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.44444444 nan nan 0.66666667 0.8 0.88888889
|
|
0.66666667 1. 0.66666667 0.76923077]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.90243902 0.75 0.91566265 0.8 0.98876404 0.98876404
|
|
0.96774194 0.94117647 0.98901099 0.8490566 ]
|
|
|
|
mean value: 0.9092615763736974
|
|
|
|
key: test_precision
|
|
value: [0.5 nan nan 0.75 0.8 1.
|
|
0.75 1. 0.57142857 0.625 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.9375 1. 0.97826087 0.73770492]
|
|
|
|
mean value: 0.9653465787598005
|
|
|
|
key: test_recall
|
|
value: [0.4 nan nan 0.6 0.8 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.82222222 0.6 0.84444444 0.66666667 0.97777778 0.97777778
|
|
1. 0.88888889 1. 1. ]
|
|
|
|
mean value: 0.8777777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 nan nan 0.7 0.8 0.9 0.7 1. 0.6 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.91111111 0.8 0.92222222 0.83333333 0.98888889 0.98888889
|
|
0.96666667 0.94444444 0.98888889 0.82222222]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: test_jcc
|
|
value: [0.28571429 nan nan 0.5 0.66666667 0.8
|
|
0.5 1. 0.5 0.625 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.82222222 0.6 0.84444444 0.66666667 0.97777778 0.97777778
|
|
0.9375 0.88888889 0.97826087 0.73770492]
|
|
|
|
mean value: 0.8431243565375782
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.38
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12627959 0.11029863 0.10071945 0.09155011 0.08990955 0.09815741
|
|
0.09487963 0.10431266 0.08676529 0.08815813]
|
|
|
|
mean value: 0.09910304546356201
|
|
|
|
key: score_time
|
|
value: [0.0188818 0.0063622 0.00565004 0.01617861 0.01684022 0.01686287
|
|
0.01757121 0.01591206 0.0148592 0.01506853]
|
|
|
|
mean value: 0.014418673515319825
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 nan nan 1. 1. 0.6
|
|
1. 0.81649658 0.65465367 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 nan nan 1. 1. 0.8 1. 0.9 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 nan nan 1. 1. 0.8
|
|
1. 0.90909091 0.83333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 nan nan 1. 1. 0.8
|
|
1. 0.83333333 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.8 1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 nan nan 1. 1. 0.8 1. 0.9 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 nan nan 1. 1. 0.66666667
|
|
1. 0.83333333 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03483057 0.03842282 0.03175402 0.02523565 0.04487014 0.03826332
|
|
0.03274846 0.04900002 0.04477096 0.03169918]
|
|
|
|
mean value: 0.037159514427185056
|
|
|
|
key: score_time
|
|
value: [0.02208352 0.00828576 0.00562572 0.02282691 0.02469707 0.02239871
|
|
0.03312135 0.02478552 0.02329421 0.02396297]
|
|
|
|
mean value: 0.021108174324035646
|
|
|
|
key: test_mcc
|
|
value: [1. nan nan 1. 1. 0.6
|
|
0.81649658 0.81649658 0.6 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.97801929 0.97801929 0.97801929 1. 1. 0.97801929
|
|
1. 1. 1. 0.97801929]
|
|
|
|
mean value: 0.9890096469218257
|
|
|
|
key: test_accuracy
|
|
value: [1. nan nan 1. 1. 0.8 0.9 0.9 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889
|
|
1. 1. 1. 0.98888889]
|
|
|
|
mean value: 0.9944444444444445
|
|
|
|
key: test_fscore
|
|
value: [1. nan nan 1. 1. 0.8
|
|
0.88888889 0.88888889 0.8 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98876404 0.98876404 0.98876404 1. 1. 0.98876404
|
|
1. 1. 1. 0.98876404]
|
|
|
|
mean value: 0.9943820224719101
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 1. 1. 0.8 1. 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.8 0.8 0.8 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.97777778 0.97777778 0.97777778 1. 1. 0.97777778
|
|
1. 1. 1. 0.97777778]
|
|
|
|
mean value: 0.9888888888888889
|
|
|
|
key: test_roc_auc
|
|
value: [1. nan nan 1. 1. 0.8 0.9 0.9 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889
|
|
1. 1. 1. 0.98888889]
|
|
|
|
mean value: 0.9944444444444445
|
|
|
|
key: test_jcc
|
|
value: [1. nan nan 1. 1. 0.66666667
|
|
0.8 0.8 0.66666667 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.97777778 0.97777778 0.97777778 1. 1. 0.97777778
|
|
1. 1. 1. 0.97777778]
|
|
|
|
mean value: 0.9888888888888889
|
|
|
|
MCC on Blind test: 1.0
|
|
|
|
Accuracy on Blind test: 1.0
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01488805 0.01686454 0.02178621 0.01625896 0.0167253 0.01618171
|
|
0.01634598 0.02672648 0.02222109 0.02587366]
|
|
|
|
mean value: 0.019387197494506837
|
|
|
|
key: score_time
|
|
value: [0.0114007 0.00593019 0.00617599 0.01183033 0.01183867 0.01179862
|
|
0.01180983 0.01191282 0.01200986 0.01189065]
|
|
|
|
mean value: 0.01065976619720459
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.40824829 0.40824829 0.65465367
|
|
0.2 0.5 0. 0.65465367]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.7 0.7 0.8 0.6 0.7 0.5 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 nan nan 0.72727273 0.72727273 0.75
|
|
0.6 0.76923077 0.61538462 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 nan nan 0.66666667 0.66666667 1.
|
|
0.6 0.625 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 0.8 0.8 0.6 0.6 1. 0.8 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.7 0.7 0.8 0.6 0.7 0.5 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 nan nan 0.57142857 0.57142857 0.6
|
|
0.42857143 0.625 0.44444444 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.23585057 0.21870351 0.2274735 0.20173454 0.19497991 0.21246147
|
|
0.19365239 0.22760653 0.19109488 0.20255446]
|
|
|
|
mean value: 0.2106111764907837
|
|
|
|
key: score_time
|
|
value: [0.01025057 0.00483108 0.00519347 0.00991392 0.00999832 0.00909162
|
|
0.01007318 0.00993586 0.00930429 0.01151347]
|
|
|
|
mean value: 0.009010577201843261
|
|
|
|
key: test_mcc
|
|
value: [1. nan nan 0.40824829 0.81649658 0.6
|
|
1. 0.81649658 0.40824829 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. nan nan 0.7 0.9 0.8 1. 0.9 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. nan nan 0.66666667 0.88888889 0.8
|
|
1. 0.88888889 0.72727273 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. nan nan 0.75 1. 0.8
|
|
1. 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 0.6 0.8 0.8 1. 0.8 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. nan nan 0.7 0.9 0.8 1. 0.9 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. nan nan 0.5 0.8 0.66666667
|
|
1. 0.8 0.57142857 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01237321 0.014323 0.01440096 0.01429844 0.0142436 0.01766992
|
|
0.01455665 0.01430321 0.01442313 0.01517463]
|
|
|
|
mean value: 0.01457667350769043
|
|
|
|
key: score_time
|
|
value: [0.01152301 0.00598216 0.00594306 0.01162028 0.01163912 0.01281118
|
|
0.01165009 0.01300716 0.01285219 0.0118072 ]
|
|
|
|
mean value: 0.010883545875549317
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 nan nan 0.21821789 0.81649658 0.40824829
|
|
0.6 0.65465367 0.40824829 0.21821789]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 nan nan 0.6 0.9 0.7 0.8 0.8 0.7 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 nan nan 0.66666667 0.90909091 0.66666667
|
|
0.8 0.83333333 0.72727273 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 nan nan 0.57142857 0.83333333 0.75
|
|
0.8 0.71428571 0.66666667 0.57142857]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 0.8 1. 0.6 0.8 1. 0.8 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 nan nan 0.6 0.9 0.7 0.8 0.8 0.7 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 nan nan 0.5 0.83333333 0.5
|
|
0.66666667 0.71428571 0.57142857 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.14
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03715086 0.01295328 0.01913691 0.02935195 0.03285313 0.05956721
|
|
0.03427505 0.03019619 0.02694535 0.03023982]
|
|
|
|
mean value: 0.031266975402832034
|
|
|
|
key: score_time
|
|
value: [0.01183128 0.00623417 0.00630283 0.02030396 0.02208257 0.03155112
|
|
0.02232051 0.01882124 0.02124667 0.02309394]
|
|
|
|
mean value: 0.018378829956054686
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.65465367 0.81649658 0.81649658
|
|
0.40824829 0.81649658 0.21821789 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 0.97801929 1. 0.97801929 1.
|
|
0.97801929 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9868115763061909
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 0.98888889 1. 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9933333333333334
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 nan nan 0.83333333 0.90909091 0.88888889
|
|
0.66666667 0.90909091 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 0.98901099 1. 0.98901099 1.
|
|
0.98901099 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9934065934065934
|
|
|
|
key: test_precision
|
|
value: [0.83333333 nan nan 0.71428571 0.83333333 1.
|
|
0.75 0.83333333 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 0.97826087 1. 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9869565217391304
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.98888889 1. 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 nan nan 0.71428571 0.83333333 0.8
|
|
0.5 0.83333333 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 0.97826087 1. 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9869565217391304
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:128: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:131: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.10745692 0.17037439 0.22704196 0.24130416 0.19570732 0.18806911
|
|
0.18574762 0.18309569 0.18836188 0.18505669]
|
|
|
|
mean value: 0.18722157478332518
|
|
|
|
key: score_time
|
|
value: [0.01179934 0.01459885 0.01264095 0.02238703 0.02003407 0.02027035
|
|
0.01176071 0.02208042 0.02279568 0.02302146]
|
|
|
|
mean value: 0.018138885498046875
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 nan nan 0.65465367 0.81649658 0.81649658
|
|
0.40824829 0.81649658 0.21821789 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 0.97801929 1. 0.97801929 1.
|
|
0.97801929 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9868115763061909
|
|
|
|
key: test_accuracy
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 0.98888889 1. 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9933333333333334
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 nan nan 0.83333333 0.90909091 0.88888889
|
|
0.66666667 0.90909091 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 0.98901099 1. 0.98901099 1.
|
|
0.98901099 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9934065934065934
|
|
|
|
key: test_precision
|
|
value: [0.83333333 nan nan 0.71428571 0.83333333 1.
|
|
0.75 0.83333333 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 0.97826087 1. 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9869565217391304
|
|
|
|
key: test_recall
|
|
value: [1. nan nan 1. 1. 0.8 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 nan nan 0.8 0.9 0.9 0.7 0.9 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.98888889 1. 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 nan nan 0.71428571 0.83333333 0.8
|
|
0.5 0.83333333 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 0.97826087 1. 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9869565217391304
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03232861 0.02856731 0.02625203 0.02819514 0.0350008 0.03808117
|
|
0.03861642 0.03147697 0.03068471 0.02853966]
|
|
|
|
mean value: 0.031774282455444336
|
|
|
|
key: score_time
|
|
value: [0.01172328 0.01187897 0.0063858 0.01171517 0.0116365 0.01235962
|
|
0.0118258 0.01185846 0.01206326 0.01185799]
|
|
|
|
mean value: 0.011330485343933105
|
|
|
|
key: test_mcc
|
|
value: [0.2 0.81649658 nan 0.40824829 0.6 0.65465367
|
|
0.40824829 0.65465367 0.21821789 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.95555556 0.95650071 0.88910845 0.93356387 0.93356387 0.95555556
|
|
0.88910845 0.91201231 0.93356387 0.91201231]
|
|
|
|
mean value: 0.9270544956323105
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.9 nan 0.7 0.8 0.8 0.7 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.97777778 0.97777778 0.94444444 0.96666667 0.96666667 0.97777778
|
|
0.94444444 0.95555556 0.96666667 0.95555556]
|
|
|
|
mean value: 0.9633333333333334
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.90909091 nan 0.66666667 0.8 0.75
|
|
0.66666667 0.83333333 0.66666667 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.97777778 0.97826087 0.94505495 0.96629213 0.96703297 0.97777778
|
|
0.94382022 0.95652174 0.96703297 0.95652174]
|
|
|
|
mean value: 0.9636093142053084
|
|
|
|
key: test_precision
|
|
value: [0.6 0.83333333 nan 0.75 0.8 1.
|
|
0.75 0.71428571 0.57142857 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.97777778 0.95744681 0.93478261 0.97727273 0.95652174 0.97777778
|
|
0.95454545 0.93617021 0.95652174 0.93617021]
|
|
|
|
mean value: 0.9564987058372812
|
|
|
|
key: test_recall
|
|
value: [0.6 1. nan 0.6 0.8 0.6 0.6 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value:/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.97777778 1. 0.95555556 0.95555556 0.97777778 0.97777778
|
|
0.93333333 0.97777778 0.97777778 0.97777778]
|
|
|
|
mean value: 0.9711111111111111
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.9 nan 0.7 0.8 0.8 0.7 0.8 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.97777778 0.97777778 0.94444444 0.96666667 0.96666667 0.97777778
|
|
0.94444444 0.95555556 0.96666667 0.95555556]
|
|
|
|
mean value: 0.9633333333333334
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.83333333 nan 0.5 0.66666667 0.6
|
|
0.5 0.71428571 0.5 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.95652174 0.95744681 0.89583333 0.93478261 0.93617021 0.95652174
|
|
0.89361702 0.91666667 0.93617021 0.91666667]
|
|
|
|
mean value: 0.9300397008942337
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.55768061 0.55615211 0.58541512 0.71465158 0.54918408 0.57905293
|
|
0.62343574 0.64894342 0.58266759 0.57590222]
|
|
|
|
mean value: 0.5973085403442383
|
|
|
|
key: score_time
|
|
value: [0.0122087 0.01510215 0.00644422 0.01736808 0.01485038 0.01541257
|
|
0.01190186 0.01582146 0.01725316 0.01302958]
|
|
|
|
mean value: 0.013939213752746583
|
|
|
|
key: test_mcc
|
|
value: [0.2 0.81649658 nan 0.65465367 0.40824829 0.81649658
|
|
0.81649658 0.65465367 0.5 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.95555556 1. 0.95555556 1. 1. 1.
|
|
0.93356387 1. 1. 0.97801929]
|
|
|
|
mean value: 0.9822694276350975
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.9 nan 0.8 0.7 0.9 0.9 0.8 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.97777778 1. 0.97777778 1. 1. 1.
|
|
0.96666667 1. 1. 0.98888889]
|
|
|
|
mean value: 0.991111111111111
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.90909091 nan 0.83333333 0.72727273 0.88888889
|
|
0.88888889 0.83333333 0.76923077 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.97777778 1. 0.97777778 1. 1. 1.
|
|
0.96629213 1. 1. 0.98901099]
|
|
|
|
mean value: 0.9910858679398006
|
|
|
|
key: test_precision
|
|
value: [0.6 0.83333333 nan 0.71428571 0.66666667 1.
|
|
1. 0.71428571 0.625 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.97777778 1. 0.97777778 1. 1. 1.
|
|
0.97727273 1. 1. 0.97826087]
|
|
|
|
mean value: 0.99110891523935
|
|
|
|
key: test_recall
|
|
value: [0.6 1. nan 1. 0.8 0.8 0.8 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.97777778 1. 0.97777778 1. 1. 1.
|
|
0.95555556 1. 1. 1. ]
|
|
|
|
mean value: 0.991111111111111
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.9 nan 0.8 0.7 0.9 0.9 0.8 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.97777778 1. 0.97777778 1. 1. 1.
|
|
0.96666667 1. 1. 0.98888889]
|
|
|
|
mean value: 0.991111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.83333333 nan 0.71428571 0.57142857 0.8
|
|
0.8 0.71428571 0.625 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.95652174 1. 0.95652174 1. 1. 1.
|
|
0.93478261 1. 1. 0.97826087]
|
|
|
|
mean value: 0.9826086956521739
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01230741 0.01123023 0.00950027 0.00946021 0.00964046 0.00955129
|
|
0.00977182 0.00977874 0.00968146 0.00962782]
|
|
|
|
mean value: 0.010054969787597656
|
|
|
|
key: score_time
|
|
value: [0.01180935 0.00984812 0.00469971 0.00937915 0.00949502 0.00958204
|
|
0.00940156 0.00952315 0.00942564 0.00944281]
|
|
|
|
mean value: 0.00926065444946289
|
|
|
|
key: test_mcc
|
|
value: [-0.21821789 0.33333333 nan 0.2 0.5 -0.21821789
|
|
0.40824829 0. 0.21821789 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.53495589 0.66097134 0.71269665 0.84970583 0.54684459 0.60350985
|
|
0.8230355 0.53031442 0.76486616 0.58456547]
|
|
|
|
mean value: 0.6611465698795331
|
|
|
|
key: test_accuracy
|
|
value: [0.4 0.6 nan 0.6 0.7 0.4 0.7 0.5 0.6 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.73333333 0.82222222 0.85555556 0.92222222 0.75555556 0.78888889
|
|
0.91111111 0.75555556 0.87777778 0.77777778]
|
|
|
|
mean value: 0.82
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.71428571 nan 0.6 0.76923077 0.5
|
|
0.72727273 0.61538462 0.66666667 0.76923077]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.78571429 0.84 0.85057471 0.92631579 0.79245283 0.81553398
|
|
0.90909091 0.78431373 0.86746988 0.80769231]
|
|
|
|
mean value: 0.8379158420394337
|
|
|
|
key: test_precision
|
|
value: [0.42857143 0.55555556 nan 0.6 0.625 0.42857143
|
|
0.66666667 0.5 0.57142857 0.625 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.65671642 0.76363636 0.88095238 0.88 0.68852459 0.72413793
|
|
0.93023256 0.70175439 0.94736842 0.71186441]
|
|
|
|
mean value: 0.7885187455634349
|
|
|
|
key: test_recall
|
|
value: [0.6 1. nan 0.6 1. 0.6 0.8 0.8 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.97777778 0.93333333 0.82222222 0.97777778 0.93333333 0.93333333
|
|
0.88888889 0.88888889 0.8 0.93333333]
|
|
|
|
mean value: 0.9088888888888889
|
|
|
|
key: test_roc_auc
|
|
value: [0.4 0.6 nan 0.6 0.7 0.4 0.7 0.5 0.6 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.73333333 0.82222222 0.85555556 0.92222222 0.75555556 0.78888889
|
|
0.91111111 0.75555556 0.87777778 0.77777778]
|
|
|
|
mean value: 0.8200000000000001
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.55555556 nan 0.42857143 0.625 0.33333333
|
|
0.57142857 0.44444444 0.5 0.625 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.64705882 0.72413793 0.74 0.8627451 0.65625 0.68852459
|
|
0.83333333 0.64516129 0.76595745 0.67741935]
|
|
|
|
mean value: 0.7240587868070179
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00987244 0.00975513 0.00979519 0.00990796 0.00982952 0.00979829
|
|
0.00977135 0.00987482 0.00979662 0.01000142]
|
|
|
|
mean value: 0.009840273857116699
|
|
|
|
key: score_time
|
|
value: [0.00938535 0.00935745 0.00481558 0.00939775 0.00938678 0.00941658
|
|
0.00940442 0.00948787 0.00941515 0.00943661]
|
|
|
|
mean value: 0.008950352668762207
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.21821789 nan 0. 0.6 0.5
|
|
0.5 0.40824829 0.2 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.71269665 0.64700558 0.53452248 0.68041382 0.62609903 0.58137767
|
|
0.58137767 0.62609903 0.69509522 0.53452248]
|
|
|
|
mean value: 0.6219209651318979
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889
|
|
0.78888889 0.81111111 0.84444444 0.76666667]
|
|
|
|
mean value: 0.8088888888888889
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.66666667 nan 0.54545455 0.8 0.57142857
|
|
0.57142857 0.72727273 0.6 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.85057471 0.81395349 0.75862069 0.81481481 0.8 0.77647059
|
|
0.77647059 0.8 0.83333333 0.75862069]
|
|
|
|
mean value: 0.7982858904944852
|
|
|
|
key: test_precision
|
|
value: [1. 0.57142857 nan 0.5 0.8 1.
|
|
1. 0.66666667 0.6 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.88095238 0.85365854 0.78571429 0.91666667 0.85 0.825
|
|
0.825 0.85 0.8974359 0.78571429]
|
|
|
|
mean value: 0.8470142053068882
|
|
|
|
key: test_recall
|
|
value: [0.2 0.8 nan 0.6 0.8 0.4 0.4 0.8 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.82222222 0.77777778 0.73333333 0.73333333 0.75555556 0.73333333
|
|
0.73333333 0.75555556 0.77777778 0.73333333]
|
|
|
|
mean value: 0.7555555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889
|
|
0.78888889 0.81111111 0.84444444 0.76666667]
|
|
|
|
mean value: 0.8088888888888889
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.5 nan 0.375 0.66666667 0.4
|
|
0.4 0.57142857 0.42857143 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.74 0.68627451 0.61111111 0.6875 0.66666667 0.63461538
|
|
0.63461538 0.66666667 0.71428571 0.61111111]
|
|
|
|
mean value: 0.665284654887596
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00961757 0.00889277 0.009341 0.00934768 0.00927901 0.0093441
|
|
0.00930309 0.00911593 0.00942564 0.00927877]
|
|
|
|
mean value: 0.009294557571411132
|
|
|
|
key: score_time
|
|
value: [0.01543903 0.010463 0.00477624 0.01466155 0.01007581 0.01023936
|
|
0.01033044 0.00999165 0.01001239 0.01013422]
|
|
|
|
mean value: 0.0106123685836792
|
|
|
|
key: test_mcc
|
|
value: [ 0.40824829 0.65465367 nan 0.2 0.81649658 0.
|
|
0.2 0.21821789 -0.21821789 0.2 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.57792049 0.51161666 0.51161666 0.60238451 0.64700558 0.58137767
|
|
0.64508188 0.60059347 0.62237591 0.51161666]
|
|
|
|
mean value: 0.5811589508465446
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.8 nan 0.6 0.9 0.5 0.6 0.6 0.4 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.78888889 0.75555556 0.75555556 0.8 0.82222222 0.78888889
|
|
0.82222222 0.8 0.81111111 0.75555556]
|
|
|
|
mean value: 0.79
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.75 nan 0.6 0.90909091 0.44444444
|
|
0.6 0.66666667 0.5 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.79120879 0.75 0.75 0.79069767 0.82978723 0.8
|
|
0.81818182 0.79545455 0.80898876 0.75 ]
|
|
|
|
mean value: 0.7884318827351257
|
|
|
|
key: test_precision
|
|
value: [0.75 1. nan 0.6 0.83333333 0.5
|
|
0.6 0.57142857 0.42857143 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.7826087 0.76744186 0.76744186 0.82926829 0.79591837 0.76
|
|
0.8372093 0.81395349 0.81818182 0.76744186]
|
|
|
|
mean value: 0.7939465545956881
|
|
|
|
key: test_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.6 0.6 nan 0.6 1. 0.4 0.6 0.8 0.6 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.8 0.73333333 0.73333333 0.75555556 0.86666667 0.84444444
|
|
0.8 0.77777778 0.8 0.73333333]
|
|
|
|
mean value: 0.7844444444444445
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.8 nan 0.6 0.9 0.5 0.6 0.6 0.4 0.6]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.78888889 0.75555556 0.75555556 0.8 0.82222222 0.78888889
|
|
0.82222222 0.8 0.81111111 0.75555556]
|
|
|
|
mean value: 0.79
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.6 nan 0.42857143 0.83333333 0.28571429
|
|
0.42857143 0.5 0.33333333 0.42857143]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.65454545 0.6 0.6 0.65384615 0.70909091 0.66666667
|
|
0.69230769 0.66037736 0.67924528 0.6 ]
|
|
|
|
mean value: 0.651607951796631
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00917888 0.00928307 0.0092535 0.00920248 0.00919914 0.00918078
|
|
0.00919771 0.00921726 0.00927758 0.00922322]
|
|
|
|
mean value: 0.009221363067626952
|
|
|
|
key: score_time
|
|
value: [0.00861764 0.0091691 0.0043776 0.00870633 0.008672 0.0087018
|
|
0.00878572 0.00866151 0.00867128 0.008708 ]
|
|
|
|
mean value: 0.008307099342346191
|
|
|
|
key: test_mcc
|
|
value: [0.2 0.6 nan 0.40824829 0.81649658 0.65465367
|
|
0.2 0.21821789 0.21821789 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.84632727 0.80498447 0.82548988 0.87447463 0.82222222 0.80498447
|
|
0.91473203 0.87011096 0.84970583 0.82548988]
|
|
|
|
mean value: 0.8438521657272541
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.8 nan 0.7 0.9 0.8 0.6 0.6 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.92222222 0.9 0.91111111 0.93333333 0.91111111 0.9
|
|
0.95555556 0.93333333 0.92222222 0.91111111]
|
|
|
|
mean value: 0.92
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.8 nan 0.66666667 0.90909091 0.75
|
|
0.6 0.66666667 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.91954023 0.89411765 0.90697674 0.92857143 0.91111111 0.89411765
|
|
0.95348837 0.93023256 0.91764706 0.90697674]
|
|
|
|
mean value: 0.9162779541113425
|
|
|
|
key: test_precision
|
|
value: [0.6 0.8 nan 0.75 0.83333333 1.
|
|
0.6 0.57142857 0.57142857 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.95238095 0.95 0.95121951 1. 0.91111111 0.95
|
|
1. 0.97560976 0.975 0.95121951]
|
|
|
|
mean value: 0.9616540843979868
|
|
|
|
key: test_recall
|
|
value: [0.6 0.8 nan 0.6 1. 0.6 0.6 0.8 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.88888889 0.84444444 0.86666667 0.86666667 0.91111111 0.84444444
|
|
0.91111111 0.88888889 0.86666667 0.86666667]
|
|
|
|
mean value: 0.8755555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.8 nan 0.7 0.9 0.8 0.6 0.6 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.92222222 0.9 0.91111111 0.93333333 0.91111111 0.9
|
|
0.95555556 0.93333333 0.92222222 0.91111111]
|
|
|
|
mean value: 0.92
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.66666667 nan 0.5 0.83333333 0.6
|
|
0.42857143 0.5 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.85106383 0.80851064 0.82978723 0.86666667 0.83673469 0.80851064
|
|
0.91111111 0.86956522 0.84782609 0.82978723]
|
|
|
|
mean value: 0.845956335047124
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.36695099 0.50412679 0.45356798 0.3968966 0.37350249 0.40489578
|
|
0.58160663 0.40739989 0.36945391 0.39499092]
|
|
|
|
mean value: 0.42533919811248777
|
|
|
|
key: score_time
|
|
value: [0.01202917 0.01205993 0.00681949 0.01205707 0.01209044 0.0119729
|
|
0.01197028 0.0120163 0.01199508 0.01207495]
|
|
|
|
mean value: 0.011508560180664063
|
|
|
|
key: test_mcc
|
|
value: [0. 0.81649658 nan 0.40824829 0.6 0.81649658
|
|
0.81649658 0.5 0.65465367 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.9 nan 0.7 0.8 0.9 0.9 0.7 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.28571429 0.90909091 nan 0.72727273 0.8 0.88888889
|
|
0.90909091 0.76923077 0.83333333 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.83333333 nan 0.66666667 0.8 1.
|
|
0.83333333 0.625 0.71428571 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.2 1. nan 0.8 0.8 0.8 1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.9 nan 0.7 0.8 0.9 0.9 0.7 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.16666667 0.83333333 nan 0.57142857 0.66666667 0.8
|
|
0.83333333 0.625 0.71428571 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01837254 0.01201224 0.01022887 0.01003718 0.00986743 0.00974107
|
|
0.00972939 0.00980163 0.00968575 0.00952888]
|
|
|
|
mean value: 0.010900497436523438
|
|
|
|
key: score_time
|
|
value: [0.01520014 0.0088861 0.00448728 0.00853777 0.00867915 0.00838041
|
|
0.00833416 0.0084734 0.00840139 0.00852203]
|
|
|
|
mean value: 0.008790183067321777
|
|
|
|
key: test_mcc
|
|
value: [0.6 0.81649658 nan 0.6 0.81649658 0.81649658
|
|
1. 0.21821789 0.65465367 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 nan 0.8 0.9 0.9 1. 0.6 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.90909091 nan 0.8 0.90909091 0.88888889
|
|
1. 0.66666667 0.83333333 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.83333333 nan 0.8 0.83333333 1.
|
|
1. 0.57142857 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 0.8 1. 0.8 1. 0.8 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.9 nan 0.8 0.9 0.9 1. 0.6 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.83333333 nan 0.66666667 0.83333333 0.8
|
|
1. 0.5 0.71428571 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08040762 0.08057427 0.08129692 0.08119392 0.08340263 0.08066964
|
|
0.08185148 0.08536839 0.0816021 0.08244801]
|
|
|
|
mean value: 0.08188149929046631
|
|
|
|
key: score_time
|
|
value: [0.01654506 0.01672173 0.0045855 0.01680946 0.0167408 0.01680946
|
|
0.01687789 0.02278209 0.01735115 0.01731062]
|
|
|
|
mean value: 0.01625337600708008
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.40824829 nan 0.65465367 0.81649658 0.6
|
|
0.81649658 0.5 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.7 nan 0.8 0.9 0.8 0.9 0.7 0.7 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.72727273 nan 0.75 0.90909091 0.8
|
|
0.90909091 0.76923077 0.76923077 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 nan 1. 0.83333333 0.8
|
|
0.83333333 0.625 0.625 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.6 0.8 nan 0.6 1. 0.8 1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.7 nan 0.8 0.9 0.8 0.9 0.7 0.7 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.57142857 nan 0.6 0.83333333 0.66666667
|
|
0.83333333 0.625 0.625 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00936699 0.00906682 0.00868106 0.00933576 0.00893283 0.00876093
|
|
0.00901222 0.00940871 0.00933743 0.00886393]
|
|
|
|
mean value: 0.009076666831970216
|
|
|
|
key: score_time
|
|
value: [0.00862956 0.00860763 0.00448084 0.00889158 0.00875568 0.00860119
|
|
0.00875163 0.0087719 0.00861621 0.00862575]
|
|
|
|
mean value: 0.00827319622039795
|
|
|
|
key: test_mcc
|
|
value: [0.21821789 0.40824829 nan 0.81649658 0.81649658 0.81649658
|
|
0.65465367 0.40824829 0.65465367 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.7 nan 0.9 0.9 0.9 0.8 0.7 0.8 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.72727273 nan 0.90909091 0.88888889 0.88888889
|
|
0.83333333 0.72727273 0.83333333 0.76923077]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 nan 0.83333333 1. 1.
|
|
0.71428571 0.66666667 0.71428571 0.625 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.4 0.8 nan 1. 0.8 0.8 1. 0.8 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.7 nan 0.9 0.9 0.9 0.8 0.7 0.8 0.7]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.57142857 nan 0.83333333 0.8 0.8
|
|
0.71428571 0.57142857 0.71428571 0.625 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.04
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.02382302 1.05082822 1.03594112 1.02103114 1.04056716 1.02298737
|
|
1.02116394 1.020087 1.01779222 1.02446771]
|
|
|
|
mean value: 1.027868890762329
|
|
|
|
key: score_time
|
|
value: [0.16805339 0.09164691 0.00471544 0.08566546 0.09267664 0.08607197
|
|
0.09215879 0.09033847 0.09076619 0.08630776]
|
|
|
|
mean value: 0.08884010314941407
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.65465367 nan 0.65465367 0.81649658 0.81649658
|
|
1. 0.21821789 0.65465367 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.8 nan 0.8 0.9 0.9 1. 0.6 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.83333333 nan 0.75 0.90909091 0.88888889
|
|
1. 0.66666667 0.83333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.71428571 nan 1. 0.83333333 1.
|
|
1. 0.57142857 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.6 1. nan 0.6 1. 0.8 1. 0.8 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.8 nan 0.8 0.9 0.9 1. 0.6 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.71428571 nan 0.6 0.83333333 0.8
|
|
1. 0.5 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.79851103 0.84037805 0.94003201 0.90074062 0.83170342 0.8777082
|
|
0.84478021 0.83035374 0.87192893 0.84235287]
|
|
|
|
mean value: 0.8578489065170288
|
|
|
|
key: score_time
|
|
value: [0.18074512 0.13748908 0.00518966 0.15447927 0.22545338 0.22390962
|
|
0.19519472 0.20300436 0.18351603 0.17219543]
|
|
|
|
mean value: 0.16811766624450683
|
|
|
|
key: test_mcc
|
|
value: [0.6 0.81649658 nan 0.65465367 1. 0.65465367
|
|
0.81649658 0.21821789 0.81649658 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.97801929 1. 1. 1. 0.97801929 0.95650071
|
|
0.97801929 1. 1. 1. ]
|
|
|
|
mean value: 0.9890558596126232
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 nan 0.8 1. 0.8 0.9 0.6 0.9 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98888889 1. 1. 1. 0.98888889 0.97777778
|
|
0.98888889 1. 1. 1. ]
|
|
|
|
mean value: 0.9944444444444445
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.90909091 nan 0.75 1. 0.75
|
|
0.88888889 0.66666667 0.90909091 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98876404 1. 1. 1. 0.98901099 0.97727273
|
|
0.98876404 1. 1. 1. ]
|
|
|
|
mean value: 0.9943811806171357
|
|
|
|
key: test_precision
|
|
value: [0.8 0.83333333 nan 1. 1. 1.
|
|
1. 0.57142857 0.83333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0.97826087 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9978260869565218
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 0.6 1. 0.6 0.8 0.8 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.97777778 1. 1. 1. 1. 0.95555556
|
|
0.97777778 1. 1. 1. ]
|
|
|
|
mean value: 0.991111111111111
|
|
|
|
key: test_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.8 0.9 nan 0.8 1. 0.8 0.9 0.6 0.9 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98888889 1. 1. 1. 0.98888889 0.97777778
|
|
0.98888889 1. 1. 1. ]
|
|
|
|
mean value: 0.9944444444444445
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.83333333 nan 0.6 1. 0.6
|
|
0.8 0.5 0.83333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.97777778 1. 1. 1. 0.97826087 0.95555556
|
|
0.97777778 1. 1. 1. ]
|
|
|
|
mean value: 0.9889371980676328
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02122736 0.00870228 0.00875449 0.00875592 0.00878906 0.00892305
|
|
0.00867987 0.00895619 0.00886059 0.00871706]
|
|
|
|
mean value: 0.010036587715148926
|
|
|
|
key: score_time
|
|
value: [0.01413703 0.0085597 0.00445867 0.00852251 0.00865173 0.00862718
|
|
0.00861049 0.0086062 0.00850129 0.00863886]
|
|
|
|
mean value: 0.008731365203857422
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.21821789 nan 0. 0.6 0.5
|
|
0.5 0.40824829 0.2 0.6 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.71269665 0.64700558 0.53452248 0.68041382 0.62609903 0.58137767
|
|
0.58137767 0.62609903 0.69509522 0.53452248]
|
|
|
|
mean value: 0.6219209651318979
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889
|
|
0.78888889 0.81111111 0.84444444 0.76666667]
|
|
|
|
mean value: 0.8088888888888889
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.66666667 nan 0.54545455 0.8 0.57142857
|
|
0.57142857 0.72727273 0.6 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.85057471 0.81395349 0.75862069 0.81481481 0.8 0.77647059
|
|
0.77647059 0.8 0.83333333 0.75862069]
|
|
|
|
mean value: 0.7982858904944852
|
|
|
|
key: test_precision
|
|
value: [1. 0.57142857 nan 0.5 0.8 1.
|
|
1. 0.66666667 0.6 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.88095238 0.85365854 0.78571429 0.91666667 0.85 0.825
|
|
0.825 0.85 0.8974359 0.78571429]
|
|
|
|
mean value: 0.8470142053068882
|
|
|
|
key: test_recall
|
|
value: [0.2 0.8 nan 0.6 0.8 0.4 0.4 0.8 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.82222222 0.77777778 0.73333333 0.73333333 0.75555556 0.73333333
|
|
0.73333333 0.75555556 0.77777778 0.73333333]
|
|
|
|
mean value: 0.7555555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889
|
|
0.78888889 0.81111111 0.84444444 0.76666667]
|
|
|
|
mean value: 0.8088888888888889
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.5 nan 0.375 0.66666667 0.4
|
|
0.4 0.57142857 0.42857143 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.74 0.68627451 0.61111111 0.6875 0.66666667 0.63461538
|
|
0.63461538 0.66666667 0.71428571 0.61111111]
|
|
|
|
mean value: 0.665284654887596
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.06223869 0.05609274 0.04758739 0.18425703 0.03499866 0.03480005
|
|
0.05428028 0.03800774 0.06300688 0.03989649]
|
|
|
|
mean value: 0.06151659488677978
|
|
|
|
key: score_time
|
|
value: [0.01010013 0.0104847 0.00464034 0.01078033 0.01093912 0.01051426
|
|
0.01065564 0.0102036 0.01068449 0.0109179 ]
|
|
|
|
mean value: 0.009992051124572753
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 nan 1. 1. 0.81649658
|
|
1. 0.6 0.65465367 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 nan 1. 1. 0.9 1. 0.8 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.90909091 nan 1. 1. 0.90909091
|
|
1. 0.8 0.83333333 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.83333333 nan 1. 1. 0.83333333
|
|
1. 0.8 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 1. 1. 1. 1. 0.8 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 nan 1. 1. 0.9 1. 0.8 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.83333333 nan 1. 1. 0.83333333
|
|
1. 0.66666667 0.71428571 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0236156 0.04939389 0.06203413 0.03593135 0.03797984 0.04009914
|
|
0.04238033 0.03859186 0.03845286 0.04021764]
|
|
|
|
mean value: 0.04086966514587402
|
|
|
|
key: score_time
|
|
value: [0.02123761 0.0344398 0.0061059 0.0222311 0.02070427 0.02340961
|
|
0.01904702 0.02245331 0.02260351 0.02361083]
|
|
|
|
mean value: 0.021584296226501466
|
|
|
|
key: test_mcc
|
|
value: [0. 0.81649658 nan 0.81649658 1. 0.40824829
|
|
0.40824829 0.65465367 0.5 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 0.97801929
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9978019293843652
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.9 nan 0.9 1. 0.7 0.7 0.8 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 0.98888889
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9988888888888889
|
|
|
|
key: test_fscore
|
|
value: [0.44444444 0.90909091 nan 0.90909091 1. 0.72727273
|
|
0.72727273 0.83333333 0.76923077 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 0.98876404
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.998876404494382
|
|
|
|
key: test_precision
|
|
value: [0.5 0.83333333 nan 0.83333333 1. 0.66666667
|
|
0.66666667 0.71428571 0.625 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.4 1. nan 1. 1. 0.8 0.8 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 0.97777778
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9977777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.9 nan 0.9 1. 0.7 0.7 0.8 0.7 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 0.98888889
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9988888888888889
|
|
|
|
key: test_jcc
|
|
value: [0.28571429 0.83333333 nan 0.83333333 1. 0.57142857
|
|
0.57142857 0.71428571 0.625 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 0.97777778
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9977777777777778
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01126742 0.00883031 0.00877166 0.00853825 0.00856185 0.00841284
|
|
0.00843716 0.0084455 0.0083921 0.00846934]
|
|
|
|
mean value: 0.008812642097473145
|
|
|
|
key: score_time
|
|
value: [0.00877571 0.0087781 0.00430918 0.0083077 0.00839949 0.00832129
|
|
0.00838757 0.0083096 0.00846934 0.00847459]
|
|
|
|
mean value: 0.008053255081176759
|
|
|
|
key: test_mcc
|
|
value: [0. 0.2 nan 0.2 0.81649658 0.65465367
|
|
0.21821789 0. 0. 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.58137767 0.57792049 0.56056066 0.66683134 0.6 0.56056066
|
|
0.64700558 0.66683134 0.53990552 0.53665631]
|
|
|
|
mean value: 0.5937649587083046
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.6 nan 0.6 0.9 0.8 0.6 0.5 0.5 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.78888889 0.78888889 0.77777778 0.83333333 0.8 0.77777778
|
|
0.82222222 0.83333333 0.76666667 0.76666667]
|
|
|
|
mean value: 0.7955555555555556
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.6 nan 0.6 0.90909091 0.75
|
|
0.5 0.44444444 0.61538462 0.90909091]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.77647059 0.78651685 0.76190476 0.83146067 0.8 0.76190476
|
|
0.81395349 0.83146067 0.74698795 0.75294118]
|
|
|
|
mean value: 0.7863600930941919
|
|
|
|
key: test_precision
|
|
value: [0.5 0.6 nan 0.6 0.83333333 1.
|
|
0.66666667 0.5 0.5 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.825 0.79545455 0.82051282 0.84090909 0.8 0.82051282
|
|
0.85365854 0.84090909 0.81578947 0.8 ]
|
|
|
|
mean value: 0.8212746378567944
|
|
|
|
key: test_recall
|
|
value: [0.6 0.6 nan 0.6 1. 0.6 0.4 0.4 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.73333333 0.77777778 0.71111111 0.82222222 0.8 0.71111111
|
|
0.77777778 0.82222222 0.68888889 0.71111111]
|
|
|
|
mean value: 0.7555555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.6 nan 0.6 0.9 0.8 0.6 0.5 0.5 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.78888889 0.78888889 0.77777778 0.83333333 0.8 0.77777778
|
|
0.82222222 0.83333333 0.76666667 0.76666667]
|
|
|
|
mean value: 0.7955555555555556
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.42857143 nan 0.42857143 0.83333333 0.6
|
|
0.33333333 0.28571429 0.44444444 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.63461538 0.64814815 0.61538462 0.71153846 0.66666667 0.61538462
|
|
0.68627451 0.71153846 0.59615385 0.60377358]
|
|
|
|
mean value: 0.6489478294139781
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00969601 0.01305962 0.01228189 0.01330137 0.01364422 0.01439548
|
|
0.01438236 0.01451087 0.01334405 0.01380467]
|
|
|
|
mean value: 0.013242053985595702
|
|
|
|
key: score_time
|
|
value: [0.0084312 0.01083541 0.0055151 0.01124859 0.01355529 0.0133152
|
|
0.03446317 0.01134872 0.01132798 0.01131558]
|
|
|
|
mean value: 0.013135623931884766
|
|
|
|
key: test_mcc
|
|
value: [0. 0.65465367 nan 0.6 0.6 0.81649658
|
|
0.65465367 0.81649658 0.33333333 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.95555556 0.91473203 0.91111111 0.95555556 0.95650071 0.95650071
|
|
0.81649658 0.87447463 0.79772404 0.85485041]
|
|
|
|
mean value: 0.8993501347937264
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.8 nan 0.8 0.8 0.9 0.8 0.9 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.97777778 0.95555556 0.95555556 0.97777778 0.97777778 0.97777778
|
|
0.9 0.93333333 0.88888889 0.92222222]
|
|
|
|
mean value: 0.9466666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.28571429 0.83333333 nan 0.8 0.8 0.88888889
|
|
0.75 0.88888889 0.71428571 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.97777778 0.95744681 0.95555556 0.97777778 0.97826087 0.97826087
|
|
0.88888889 0.92857143 0.9 0.91566265]
|
|
|
|
mean value: 0.9458202626814911
|
|
|
|
key: test_precision
|
|
value: [0.5 0.71428571 nan 0.8 0.8 1.
|
|
1. 1. 0.55555556 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.97777778 0.91836735 0.95555556 0.97777778 0.95744681 0.95744681
|
|
1. 1. 0.81818182 1. ]
|
|
|
|
mean value: 0.9562553893252982
|
|
|
|
key: test_recall
|
|
value: [0.2 1. nan 0.8 0.8 0.8 0.6 0.8 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.97777778 1. 0.95555556 0.97777778 1. 1.
|
|
0.8 0.86666667 1. 0.84444444]
|
|
|
|
mean value: 0.9422222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.8 nan 0.8 0.8 0.9 0.8 0.9 0.6 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.97777778 0.95555556 0.95555556 0.97777778 0.97777778 0.97777778
|
|
0.9 0.93333333 0.88888889 0.92222222]
|
|
|
|
mean value: 0.9466666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.16666667 0.71428571 nan 0.66666667 0.66666667 0.8
|
|
0.6 0.8 0.55555556 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.95652174 0.91836735 0.91489362 0.95652174 0.95744681 0.95744681
|
|
0.8 0.86666667 0.81818182 0.84444444]
|
|
|
|
mean value: 0.8990490988535128
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01253462 0.01219559 0.01248789 0.01262569 0.01222777 0.01228476
|
|
0.01211166 0.01249099 0.01256251 0.01269126]
|
|
|
|
mean value: 0.012421274185180664
|
|
|
|
key: score_time
|
|
value: [0.01041532 0.01126981 0.00603032 0.01128006 0.01135278 0.01132679
|
|
0.01124406 0.01126432 0.01123857 0.0112381 ]
|
|
|
|
mean value: 0.01066601276397705
|
|
|
|
key: test_mcc
|
|
value: [0. 0.5 nan 0.6 0.6 0.65465367
|
|
0.40824829 0.6 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.95555556 0.74278135 0.97801929 0.89442719 0.97801929 0.46499055
|
|
0.81649658 0.77919372 0.91473203 1. ]
|
|
|
|
mean value: 0.8524215579246943
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.7 nan 0.8 0.8 0.8 0.7 0.8 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.97777778 0.85555556 0.98888889 0.94444444 0.98888889 0.67777778
|
|
0.9 0.87777778 0.95555556 1. ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: test_fscore
|
|
value: [0.44444444 0.76923077 nan 0.8 0.8 0.83333333
|
|
0.66666667 0.8 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.97777778 0.87378641 0.98876404 0.94736842 0.98876404 0.75630252
|
|
0.88888889 0.86075949 0.95348837 1. ]
|
|
|
|
mean value: 0.9235899972146242
|
|
|
|
key: test_precision
|
|
value: [0.5 0.625 nan 0.8 0.8 0.71428571
|
|
0.75 0.8 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.97777778 0.77586207 1. 0.9 1. 0.60810811
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9261747954851403
|
|
|
|
key: test_recall
|
|
value: [0.4 1. nan 0.8 0.8 1. 0.6 0.8 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.97777778 1. 0.97777778 1. 0.97777778 1.
|
|
0.8 0.75555556 0.91111111 1. ]
|
|
|
|
mean value: 0.94
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.7 nan 0.8 0.8 0.8 0.7 0.8 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.97777778 0.85555556 0.98888889 0.94444444 0.98888889 0.67777778
|
|
0.9 0.87777778 0.95555556 1. ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: test_jcc
|
|
value: [0.28571429 0.625 nan 0.66666667 0.66666667 0.71428571
|
|
0.5 0.66666667 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.95652174 0.77586207 0.97777778 0.9 0.97777778 0.60810811
|
|
0.8 0.75555556 0.91111111 1. ]
|
|
|
|
mean value: 0.8662714138426282
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0932951 0.08622575 0.08703232 0.08533549 0.08312297 0.08790827
|
|
0.08446789 0.08704805 0.08473301 0.08655405]
|
|
|
|
mean value: 0.08657228946685791
|
|
|
|
key: score_time
|
|
value: [0.01610136 0.0159595 0.00456142 0.01449895 0.01445603 0.01618457
|
|
0.01588774 0.01573229 0.01589179 0.01581264]
|
|
|
|
mean value: 0.014508628845214843
|
|
|
|
key: test_mcc
|
|
value: [0.6 0.81649658 nan 0.65465367 1. 0.6
|
|
1. 1. 0.65465367 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 nan 0.8 1. 0.8 1. 1. 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.90909091 nan 0.75 1. 0.8
|
|
1. 1. 0.83333333 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.83333333 nan 1. 1. 0.8
|
|
1. 1. 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 0.6 1. 0.8 1. 1. 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.9 nan 0.8 1. 0.8 1. 1. 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.83333333 nan 0.6 1. 0.66666667
|
|
1. 1. 0.71428571 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03166676 0.04806876 0.04904318 0.03325343 0.03756118 0.02605057
|
|
0.051754 0.03409243 0.02555013 0.03323889]
|
|
|
|
mean value: 0.037027931213378905
|
|
|
|
key: score_time
|
|
value: [0.02363086 0.02686834 0.00466347 0.02329421 0.02710223 0.02057576
|
|
0.02632928 0.01891589 0.01708174 0.02512145]
|
|
|
|
mean value: 0.021358323097229005
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 nan 0.81649658 1. 0.6
|
|
1. 0.65465367 0.81649658 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.97801929 0.97801929 1. 1. 1. 1.
|
|
0.97801929 1. 1. 1. ]
|
|
|
|
mean value: 0.9934057881530954
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 nan 0.9 1. 0.8 1. 0.8 0.9 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98888889 0.98888889 1. 1. 1. 1.
|
|
0.98888889 1. 1. 1. ]
|
|
|
|
mean value: 0.9966666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.90909091 nan 0.90909091 1. 0.8
|
|
1. 0.83333333 0.90909091 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98876404 0.98876404 1. 1. 1. 1.
|
|
0.98876404 1. 1. 1. ]
|
|
|
|
mean value: 0.996629213483146
|
|
|
|
key: test_precision
|
|
value: [1. 0.83333333 nan 0.83333333 1. 0.8
|
|
1. 0.71428571 0.83333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 1. 1. 0.8 1. 1. 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.97777778 0.97777778 1. 1. 1. 1.
|
|
0.97777778 1. 1. 1. ]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 nan 0.9 1. 0.8 1. 0.8 0.9 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98888889 0.98888889 1. 1. 1. 1.
|
|
0.98888889 1. 1. 1. ]
|
|
|
|
mean value: 0.9966666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.83333333 nan 0.83333333 1. 0.66666667
|
|
1. 0.71428571 0.83333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.97777778 0.97777778 1. 1. 1. 1.
|
|
0.97777778 1. 1. 1. ]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
MCC on Blind test: 0.95
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01846266 0.01602125 0.01668453 0.02254128 0.01823306 0.01637578
|
|
0.01689363 0.01705527 0.01624489 0.01630282]
|
|
|
|
mean value: 0.017481517791748048
|
|
|
|
key: score_time
|
|
value: [0.01143336 0.01123261 0.00608587 0.01233387 0.01204014 0.01180053
|
|
0.011935 0.01179743 0.01177883 0.01175404]
|
|
|
|
mean value: 0.011219167709350586
|
|
|
|
key: test_mcc
|
|
value: [0. 0.6 nan 0.2 0.81649658 0.6
|
|
0.65465367 0.5 0.33333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.8 nan 0.6 0.9 0.8 0.8 0.7 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.28571429 0.8 nan 0.6 0.90909091 0.8
|
|
0.83333333 0.76923077 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.8 nan 0.6 0.83333333 0.8
|
|
0.71428571 0.625 0.55555556 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.2 0.8 nan 0.6 1. 0.8 1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.8 nan 0.6 0.9 0.8 0.8 0.7 0.6 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.16666667 0.66666667 nan 0.42857143 0.83333333 0.66666667
|
|
0.71428571 0.625 0.55555556 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17908907 0.17281628 0.17472339 0.16933966 0.17832685 0.17855334
|
|
0.17665958 0.20065045 0.17976284 0.14105153]
|
|
|
|
mean value: 0.17509729862213136
|
|
|
|
key: score_time
|
|
value: [0.00950193 0.00909305 0.00476193 0.00946689 0.00997877 0.0098536
|
|
0.00984955 0.00980544 0.00993419 0.00911093]
|
|
|
|
mean value: 0.009135627746582031
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 nan 0.81649658 1. 0.6
|
|
1. 1. 0.81649658 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 nan 0.9 1. 0.8 1. 1. 0.9 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.90909091 nan 0.90909091 1. 0.8
|
|
1. 1. 0.90909091 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.83333333 nan 0.83333333 1. 0.8
|
|
1. 1. 0.83333333 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 1. nan 1. 1. 0.8 1. 1. 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 nan 0.9 1. 0.8 1. 1. 0.9 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.83333333 nan 0.83333333 1. 0.66666667
|
|
1. 1. 0.83333333 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.95
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01081228 0.01397157 0.01533651 0.01416707 0.01431513 0.01669645
|
|
0.0199163 0.01420426 0.01628375 0.01877785]
|
|
|
|
mean value: 0.01544811725616455
|
|
|
|
key: score_time
|
|
value: [0.01137567 0.01164627 0.00611782 0.01310349 0.01270056 0.01162291
|
|
0.01324058 0.01273084 0.01166701 0.01337457]
|
|
|
|
mean value: 0.011757969856262207
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.81649658 nan 0.5 0.81649658 0.81649658
|
|
0.81649658 0.65465367 1. 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.9 nan 0.7 0.9 0.9 0.9 0.8 1. 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.88888889 nan 0.57142857 0.88888889 0.88888889
|
|
0.88888889 0.75 1. 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [ 1. 1. nan 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.2 0.8 nan 0.4 0.8 0.8 0.8 0.6 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.9 nan 0.7 0.9 0.9 0.9 0.8 1. 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.8 nan 0.4 0.8 0.8 0.8 0.6 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03344846 0.03200626 0.032161 0.03211999 0.0323348 0.03212428
|
|
0.03187537 0.03182149 0.0317018 0.03172779]
|
|
|
|
mean value: 0.03213212490081787
|
|
|
|
key: score_time
|
|
value: [0.01959944 0.01622725 0.01191449 0.02088118 0.0222888 0.01162863
|
|
0.02132607 0.01181674 0.02234721 0.02092266]
|
|
|
|
mean value: 0.01789524555206299
|
|
|
|
key: test_mcc
|
|
value: [0.2 0.65465367 nan 0.65465367 0.81649658 0.81649658
|
|
0.81649658 0.65465367 0.65465367 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 0.97801929 0.97801929 0.97801929 1.
|
|
0.97801929 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9846135056905561
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9922222222222222
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.83333333 nan 0.83333333 0.90909091 0.88888889
|
|
0.90909091 0.83333333 0.83333333 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 0.98901099 0.98901099 0.98901099 1.
|
|
0.98901099 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9923076923076923
|
|
|
|
key: test_precision
|
|
value: [0.6 0.71428571 nan 0.71428571 0.83333333 1.
|
|
0.83333333 0.71428571 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9847826086956522
|
|
|
|
key: test_recall
|
|
value: [0.6 1. nan 1. 1. 0.8 1. 1. 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9922222222222221
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.71428571 nan 0.71428571 0.83333333 0.8
|
|
0.83333333 0.71428571 0.71428571 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9847826086956522
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:148: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:151: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.21318793 0.09895921 0.21072364 0.19039989 0.18606758 0.18941188
|
|
0.18936801 0.20299673 0.19019341 0.22260165]
|
|
|
|
mean value: 0.18939099311828614
|
|
|
|
key: score_time
|
|
value: [0.02227712 0.01179457 0.01261592 0.02156162 0.02067518 0.02020216
|
|
0.02313948 0.0224812 0.02181292 0.02340198]
|
|
|
|
mean value: 0.019996213912963866
|
|
|
|
key: test_mcc
|
|
value: [0.2 0.65465367 nan 0.65465367 0.81649658 0.81649658
|
|
0.81649658 0.65465367 0.65465367 0.81649658]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 0.97801929 0.97801929 0.97801929 1.
|
|
0.97801929 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9846135056905561
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9922222222222222
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.83333333 nan 0.83333333 0.90909091 0.88888889
|
|
0.90909091 0.83333333 0.83333333 0.88888889]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 0.98901099 0.98901099 0.98901099 1.
|
|
0.98901099 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9923076923076923
|
|
|
|
key: test_precision
|
|
value: [0.6 0.71428571 nan 0.71428571 0.83333333 1.
|
|
0.83333333 0.71428571 0.71428571 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9847826086956522
|
|
|
|
key: test_recall
|
|
value: [0.6 1. nan 1. 1. 0.8 1. 1. 1. 0.8]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 1.
|
|
0.98888889 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9922222222222221
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.71428571 nan 0.71428571 0.83333333 0.8
|
|
0.83333333 0.71428571 0.71428571 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 1.
|
|
0.97826087 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9847826086956522
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0250783 0.03367138 0.02173638 0.03091216 0.0839653 0.04441166
|
|
0.03470874 0.0207479 0.02312613 0.02219653]
|
|
|
|
mean value: 0.034055447578430174
|
|
|
|
key: score_time
|
|
value: [0.01159692 0.01183295 0.01154208 0.0117619 0.01320601 0.00625491
|
|
0.01170278 0.01160264 0.0115304 0.01158214]
|
|
|
|
mean value: 0.011261272430419921
|
|
|
|
key: test_mcc
|
|
value: [ 0.33333333 0.70710678 0.4472136 1. nan nan
|
|
-0.33333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.96225045 1. 1. 1. 0.96225045 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9924500897298753
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.83333333 0.66666667 1. nan nan
|
|
0.33333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98076923 1. 1. 1. 0.98076923 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9961538461538462
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.85714286 0.75 1. nan nan
|
|
0.33333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98039216 1. 1. 1. 0.98039216 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.996078431372549
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.6 1. nan nan
|
|
0.33333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.66666667 1. 1. 1. nan nan
|
|
0.33333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.96153846 1. 1. 1. 0.96153846 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9923076923076923
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.83333333 0.66666667 1. nan nan
|
|
0.33333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98076923 1. 1. 1. 0.98076923 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9961538461538462
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.75 0.6 1. nan nan 0.2 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.96153846 1. 1. 1. 0.96153846 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9923076923076923
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.27737689 0.28079844 0.29659295 0.26077461 0.26526332 0.26656747
|
|
0.26323867 0.28429699 0.31050968 0.30310488]
|
|
|
|
mean value: 0.28085238933563234
|
|
|
|
key: score_time
|
|
value: [0.01181126 0.01163769 0.01170397 0.0118041 0.00647473 0.00621986
|
|
0.01164699 0.01164746 0.0117209 0.01170206]
|
|
|
|
mean value: 0.01063690185546875
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.70710678 0.4472136 1. nan nan
|
|
0.70710678 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.83333333 0.66666667 1. nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.85714286 0.75 1. nan nan
|
|
0.8 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.6 1. nan nan
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 1. nan nan
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.83333333 0.66666667 1. nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.75 0.6 1. nan nan
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01206636 0.0112257 0.00846434 0.0084424 0.01119018 0.01600456
|
|
0.0084219 0.00850463 0.00825167 0.00816083]
|
|
|
|
mean value: 0.010073256492614747
|
|
|
|
key: score_time
|
|
value: [0.01196933 0.00882673 0.00861526 0.01163769 0.00628543 0.00520444
|
|
0.00848961 0.00851965 0.00831652 0.00836253]
|
|
|
|
mean value: 0.00862271785736084
|
|
|
|
key: test_mcc
|
|
value: [-0.4472136 0.33333333 0.33333333 0.70710678 nan nan
|
|
-0.4472136 0. 0.16666667 -0.16666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.85634884 0.80829038 0.84866842 0.89056356 0.70064905 0.82305489
|
|
0.77151675 0.77151675 0.53088871 0.81196581]
|
|
|
|
mean value: 0.7813463156135712
|
|
|
|
key: test_accuracy
|
|
value: [0.33333333 0.66666667 0.66666667 0.83333333 nan nan
|
|
0.33333333 0.5 0.6 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.92307692 0.90384615 0.92307692 0.94230769 0.84615385 0.90384615
|
|
0.88461538 0.88461538 0.71698113 0.90566038]
|
|
|
|
mean value: 0.8834179970972423
|
|
|
|
key: test_fscore
|
|
value: [0. 0.66666667 0.66666667 0.8 nan nan
|
|
0.5 0.4 0.5 0.4 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.91666667 0.90196078 0.92 0.93877551 0.83333333 0.9122807
|
|
0.88 0.88 0.61538462 0.90566038]
|
|
|
|
mean value: 0.8704061989015298
|
|
|
|
key: test_precision
|
|
value: [0. 0.66666667 0.66666667 1. nan nan
|
|
0.4 0.5 0.5 0.5 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 0.92 0.95833333 1. 0.90909091 0.83870968
|
|
0.91666667 0.91666667 1. 0.88888889]
|
|
|
|
mean value: 0.9348356142065819
|
|
|
|
key: test_recall
|
|
value: [0. 0.66666667 0.66666667 0.66666667 nan nan
|
|
0.66666667 0.33333333 0.5 0.33333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.84615385 0.88461538 0.88461538 0.88461538 0.76923077 1.
|
|
0.84615385 0.84615385 0.44444444 0.92307692]
|
|
|
|
mean value: 0.832905982905983
|
|
|
|
key: test_roc_auc
|
|
value: [0.33333333 0.66666667 0.66666667 0.83333333 nan nan
|
|
0.33333333 0.5 0.58333333 0.41666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.92307692 0.90384615 0.92307692 0.94230769 0.84615385 0.90384615
|
|
0.88461538 0.88461538 0.72222222 0.90598291]
|
|
|
|
mean value: 0.883974358974359
|
|
|
|
key: test_jcc
|
|
value: [0. 0.5 0.5 0.66666667 nan nan
|
|
0.33333333 0.25 0.33333333 0.25 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.84615385 0.82142857 0.85185185 0.88461538 0.71428571 0.83870968
|
|
0.78571429 0.78571429 0.44444444 0.82758621]
|
|
|
|
mean value: 0.7800504268524291
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00878549 0.00863194 0.00832844 0.00831485 0.00832462 0.00855541
|
|
0.00872874 0.00837517 0.00835276 0.00829864]
|
|
|
|
mean value: 0.008469605445861816
|
|
|
|
key: score_time
|
|
value: [0.00858736 0.00840878 0.00837111 0.00836158 0.00419855 0.00429416
|
|
0.00853777 0.00835299 0.00840139 0.00841165]
|
|
|
|
mean value: 0.007592535018920899
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. 0.33333333 0. nan nan
|
|
0.70710678 0.70710678 -0.40824829 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.69436507 0.71151247 0.70064905 0.77849894 0.77151675 0.81312325
|
|
0.80829038 0.77151675 0.71778392 0.77540056]
|
|
|
|
mean value: 0.7542657149596319
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.5 0.66666667 0.5 nan nan
|
|
0.83333333 0.83333333 0.4 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615
|
|
0.90384615 0.88461538 0.8490566 0.88679245]
|
|
|
|
mean value: 0.8735849056603774
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.4 0.66666667 0.4 nan nan
|
|
0.85714286 0.8 0. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.84 0.82608696 0.83333333 0.875 0.88888889 0.89795918
|
|
0.90196078 0.88 0.83333333 0.88 ]
|
|
|
|
mean value: 0.865656248006449
|
|
|
|
key: test_precision
|
|
value: [0.5 0.5 0.66666667 0.5 nan nan
|
|
0.75 1. 0. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.875 0.95 0.90909091 0.95454545 0.85714286 0.95652174
|
|
0.92 0.91666667 0.95238095 0.91666667]
|
|
|
|
mean value: 0.9208015245623942
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.33333333 0.66666667 0.33333333 nan nan
|
|
1. 0.66666667 0. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.80769231 0.73076923 0.76923077 0.80769231 0.92307692 0.84615385
|
|
0.88461538 0.84615385 0.74074074 0.84615385]
|
|
|
|
mean value: 0.8202279202279202
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.66666667 0.5 nan nan
|
|
0.83333333 0.83333333 0.33333333 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615
|
|
0.90384615 0.88461538 0.8511396 0.88603989]
|
|
|
|
mean value: 0.8737179487179488
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.25 0.5 0.25 nan nan
|
|
0.75 0.66666667 0. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.72413793 0.7037037 0.71428571 0.77777778 0.8 0.81481481
|
|
0.82142857 0.78571429 0.71428571 0.78571429]
|
|
|
|
mean value: 0.764186279875935
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00827241 0.00880098 0.00887871 0.00861597 0.00911736 0.00877094
|
|
0.00847507 0.00913715 0.00888658 0.00889826]
|
|
|
|
mean value: 0.008785343170166016
|
|
|
|
key: score_time
|
|
value: [0.00981283 0.0094254 0.00966692 0.00966859 0.0045979 0.00435901
|
|
0.00986171 0.009624 0.00968266 0.0103004 ]
|
|
|
|
mean value: 0.008699941635131835
|
|
|
|
key: test_mcc
|
|
value: [ 0.4472136 0.33333333 0.70710678 0. nan nan
|
|
-0.4472136 1. 0.61237244 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.58333333 0.55339859 0.58080232 0.71151247 0.58789635 0.71151247
|
|
0.58789635 0.65824263 0.53035501 0.57140596]
|
|
|
|
mean value: 0.6076355494357508
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.66666667 0.83333333 0.5 nan nan
|
|
0.33333333 1. 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.76923077 0.76923077 0.78846154 0.84615385 0.78846154 0.84615385
|
|
0.78846154 0.82692308 0.75471698 0.77358491]
|
|
|
|
mean value: 0.7951378809869376
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.66666667 0.85714286 0. nan nan
|
|
0.5 1. 0.66666667 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.71428571 0.73913043 0.7755102 0.82608696 0.76595745 0.82608696
|
|
0.76595745 0.81632653 0.72340426 0.72727273]
|
|
|
|
mean value: 0.7680018673014577
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
test_precision
|
|
value: [1. 0.66666667 0.75 0. nan nan
|
|
0.4 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.9375 0.85 0.82608696 0.95 0.85714286 0.95
|
|
0.85714286 0.86956522 0.85 0.88888889]
|
|
|
|
mean value: 0.8836326777087646
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 1. 0. nan nan
|
|
0.66666667 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.57692308 0.65384615 0.73076923 0.73076923 0.69230769 0.73076923
|
|
0.69230769 0.76923077 0.62962963 0.61538462]
|
|
|
|
mean value: 0.6821937321937321
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.66666667 0.83333333 0.5 nan nan
|
|
0.33333333 1. 0.75 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.76923077 0.76923077 0.78846154 0.84615385 0.78846154 0.84615385
|
|
0.78846154 0.82692308 0.75712251 0.77065527]
|
|
|
|
mean value: 0.7950854700854701
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.5 0.75 0. nan nan
|
|
0.33333333 1. 0.5 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.55555556 0.5862069 0.63333333 0.7037037 0.62068966 0.7037037
|
|
0.62068966 0.68965517 0.56666667 0.57142857]
|
|
|
|
mean value: 0.6251632913701879
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00861406 0.00843453 0.00842524 0.00845695 0.00851583 0.00869155
|
|
0.01020479 0.008883 0.0094862 0.00905514]
|
|
|
|
mean value: 0.008876729011535644
|
|
|
|
key: score_time
|
|
value: [0.00841975 0.00838161 0.00835943 0.00834155 0.00417018 0.00427651
|
|
0.0093472 0.00916672 0.00854087 0.00875092]
|
|
|
|
mean value: 0.0077754735946655275
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.33333333 0.4472136 0.4472136 nan nan
|
|
-0.4472136 0.70710678 0.61237244 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.9258201 0.89056356 0.9258201 0.89056356 0.92307692 0.9258201
|
|
0.84615385 0.80829038 0.89271208 0.89227454]
|
|
|
|
mean value: 0.8921095178279635
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.66666667 0.66666667 0.66666667 nan nan
|
|
0.33333333 0.83333333 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.96153846 0.94230769 0.96153846 0.94230769 0.96153846 0.96153846
|
|
0.92307692 0.90384615 0.94339623 0.94339623]
|
|
|
|
mean value: 0.9444484760522497
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.66666667 0.75 0.5 nan nan
|
|
0.5 0.8 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.96 0.93877551 0.96 0.93877551 0.96153846 0.96
|
|
0.92307692 0.90566038 0.94117647 0.93877551]
|
|
|
|
mean value: 0.9427778763174356
|
|
|
|
key: test_precision
|
|
value: [0.5 0.66666667 0.6 1. nan nan
|
|
0.4 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0.96153846 1.
|
|
0.92307692 0.88888889 1. 1. ]
|
|
|
|
mean value: 0.9773504273504273
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 1. 0.33333333 nan nan
|
|
0.66666667 0.66666667 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.92307692 0.88461538 0.92307692 0.88461538 0.96153846 0.92307692
|
|
0.92307692 0.92307692 0.88888889 0.88461538]
|
|
|
|
mean value: 0.911965811965812
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.66666667 0.66666667 0.66666667 nan nan
|
|
0.33333333 0.83333333 0.75 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.96153846 0.94230769 0.96153846 0.94230769 0.96153846 0.96153846
|
|
0.92307692 0.90384615 0.94444444 0.94230769]
|
|
|
|
mean value: 0.9444444444444444
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.5 0.6 0.33333333 nan nan
|
|
0.33333333 0.66666667 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.92307692 0.88461538 0.92307692 0.88461538 0.92592593 0.92307692
|
|
0.85714286 0.82758621 0.88888889 0.88461538]
|
|
|
|
mean value: 0.8922620801931147
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.45
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.257622 0.25290346 0.26910257 0.36610937 0.23078704 0.28138971
|
|
0.24945307 0.25755453 0.2490387 0.25744033]
|
|
|
|
mean value: 0.2671400785446167
|
|
|
|
key: score_time
|
|
value: [0.01196766 0.01187682 0.01183176 0.01191616 0.0065763 0.00656652
|
|
0.01190734 0.01188445 0.0118742 0.01191521]
|
|
|
|
mean value: 0.010831642150878906
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.33333333 0.4472136 0.70710678 nan nan
|
|
0. 1. 0.61237244 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.66666667 0.66666667 0.83333333 nan nan
|
|
0.5 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.75 0.8 nan nan
|
|
0.4 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.6 1. nan nan
|
|
0.5 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 0.66666667 nan nan
|
|
0.33333333 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.66666667 0.66666667 0.83333333 nan nan
|
|
0.5 1. 0.75 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.6 0.66666667 nan nan
|
|
0.25 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01336122 0.01300812 0.00985575 0.0098536 0.00926661 0.00887156
|
|
0.00901055 0.00941229 0.00927663 0.00903344]
|
|
|
|
mean value: 0.010094976425170899
|
|
|
|
key: score_time
|
|
value: [0.0113616 0.0098033 0.00913811 0.00857115 0.00427389 0.00418425
|
|
0.00839829 0.00844026 0.00838804 0.00840545]
|
|
|
|
mean value: 0.008096432685852051
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 1. 0.70710678 1. nan nan
|
|
0.70710678 0.70710678 0.61237244 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 1. 0.83333333 1. nan nan
|
|
0.83333333 0.83333333 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 1. 0.8 1. nan nan
|
|
0.85714286 0.8 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 1. 1. nan nan
|
|
0.75 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.66666667 1. nan nan
|
|
1. 0.66666667 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 1. 0.83333333 1. nan nan
|
|
0.83333333 0.83333333 0.75 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 1. 0.66666667 1. nan nan
|
|
0.75 0.66666667 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07744193 0.0774622 0.07790446 0.07826352 0.07741332 0.07785106
|
|
0.07745719 0.07764459 0.07778835 0.07841229]
|
|
|
|
mean value: 0.07776389122009278
|
|
|
|
key: score_time
|
|
value: [0.01661301 0.0167923 0.01716065 0.01694202 0.00452185 0.00454688
|
|
0.0172863 0.01668048 0.016675 0.01684666]
|
|
|
|
mean value: 0.014406514167785645
|
|
|
|
key: test_mcc
|
|
value: [0. 0.33333333 0.70710678 0.33333333 nan nan
|
|
0.4472136 0.70710678 0.61237244 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.66666667 0.83333333 0.66666667 nan nan
|
|
0.66666667 0.83333333 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.66666667 0.85714286 0.66666667 nan nan
|
|
0.75 0.8 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.66666667 0.75 0.66666667 nan nan
|
|
0.6 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 1. 0.66666667 nan nan
|
|
1. 0.66666667 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.66666667 0.83333333 0.66666667 nan nan
|
|
0.66666667 0.83333333 0.75 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.5 0.75 0.5 nan nan
|
|
0.6 0.66666667 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00849891 0.00832915 0.0084703 0.00833631 0.00853562 0.00831246
|
|
0.00834322 0.00833392 0.00853539 0.00829196]
|
|
|
|
mean value: 0.008398723602294923
|
|
|
|
key: score_time
|
|
value: [0.00839496 0.00838709 0.00878024 0.0090816 0.00428891 0.00421572
|
|
0.00838637 0.00845718 0.00835276 0.00842619]
|
|
|
|
mean value: 0.007677102088928222
|
|
|
|
key: test_mcc
|
|
value: [0. 0.33333333 0.4472136 0. nan nan
|
|
0.4472136 0. 0.61237244 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.66666667 0.66666667 0.5 nan nan
|
|
0.66666667 0.5 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.66666667 0.75 0.57142857 nan nan
|
|
0.5 0.4 0.66666667 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.66666667 0.6 0.5 nan nan
|
|
1. 0.5 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 1. 0.66666667 nan nan
|
|
0.33333333 0.33333333 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.66666667 0.66666667 0.5 nan nan
|
|
0.66666667 0.5 0.75 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.5 0.6 0.4 nan nan
|
|
0.33333333 0.25 0.5 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.9603796 0.96432114 0.96725059 0.96787596 0.9631319 0.97312737
|
|
0.96518373 0.96293473 0.96418524 0.96683455]
|
|
|
|
mean value: 0.9655224800109863
|
|
|
|
key: score_time
|
|
value: [0.14033937 0.08650279 0.0865438 0.08715081 0.0047214 0.0044682
|
|
0.08628559 0.08696723 0.08689618 0.08648324]
|
|
|
|
mean value: 0.07563586235046386
|
|
|
|
key: test_mcc
|
|
value: [0.70710678 0.70710678 1. 0.70710678 nan nan
|
|
0.70710678 0.70710678 0.16666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.83333333 1. 0.83333333 nan nan
|
|
0.83333333 0.83333333 0.6 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.85714286 1. 0.8 nan nan
|
|
0.85714286 0.8 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 1. 1. nan nan 0.75 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 0.66666667 nan nan
|
|
1. 0.66666667 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.83333333 1. 0.83333333 nan nan
|
|
0.83333333 0.83333333 0.58333333 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.75 1. 0.66666667 nan nan
|
|
0.75 0.66666667 0.33333333 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.83453751 0.86814737 0.89418793 0.84960437 0.85452509 0.79625249
|
|
0.84640026 0.8471365 0.86017728 0.86971331]
|
|
|
|
mean value: 0.8520682096481323
|
|
|
|
key: score_time
|
|
value: [0.18317986 0.18850303 0.23107004 0.17553663 0.00471902 0.00476313
|
|
0.23304439 0.20270252 0.2285862 0.14570665]
|
|
|
|
mean value: 0.1597811460494995
|
|
|
|
key: test_mcc
|
|
value: [0. 0.70710678 1. 0.70710678 nan nan
|
|
0.70710678 1. 0.16666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.96225045 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9962250448649377
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.83333333 1. 0.83333333 nan nan
|
|
0.83333333 1. 0.6 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98076923 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9980769230769231
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.85714286 1. 0.8 nan nan
|
|
0.85714286 1. 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98039216 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9980392156862745
|
|
|
|
key: test_precision
|
|
value: [0.5 0.75 1. 1. nan nan 0.75 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 1. 1. 0.66666667 nan nan
|
|
1. 1. 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.96153846 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9961538461538462
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.83333333 1. 0.83333333 nan nan
|
|
0.83333333 1. 0.58333333 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98076923 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9980769230769231
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.75 1. 0.66666667 nan nan
|
|
0.75 1. 0.33333333 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.96153846 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9961538461538462
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02309561 0.00960851 0.00958014 0.00926995 0.00950646 0.01018548
|
|
0.00907755 0.00863814 0.00857759 0.00869775]
|
|
|
|
mean value: 0.010623717308044433
|
|
|
|
key: score_time
|
|
value: [0.01342392 0.00879192 0.01012683 0.00904608 0.00474715 0.00480556
|
|
0.00867152 0.00860524 0.0085516 0.00846505]
|
|
|
|
mean value: 0.00852348804473877
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. 0.33333333 0. nan nan
|
|
0.70710678 0.70710678 -0.40824829 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.69436507 0.71151247 0.70064905 0.77849894 0.77151675 0.81312325
|
|
0.80829038 0.77151675 0.71778392 0.77540056]
|
|
|
|
mean value: 0.7542657149596319
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.5 0.66666667 0.5 nan nan
|
|
0.83333333 0.83333333 0.4 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615
|
|
0.90384615 0.88461538 0.8490566 0.88679245]
|
|
|
|
mean value: 0.8735849056603774
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.4 0.66666667 0.4 nan nan
|
|
0.85714286 0.8 0. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.84 0.82608696 0.83333333 0.875 0.88888889 0.89795918
|
|
0.90196078 0.88 0.83333333 0.88 ]
|
|
|
|
mean value: 0.865656248006449
|
|
|
|
key: test_precision
|
|
value: [0.5 0.5 0.66666667 0.5 nan nan
|
|
0.75 1. 0. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.875 0.95 0.90909091 0.95454545 0.85714286 0.95652174
|
|
0.92 0.91666667 0.95238095 0.91666667]
|
|
|
|
mean value: 0.9208015245623942
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.33333333 0.66666667 0.33333333 nan nan
|
|
1. 0.66666667 0. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.80769231 0.73076923 0.76923077 0.80769231 0.92307692 0.84615385
|
|
0.88461538 0.84615385 0.74074074 0.84615385]
|
|
|
|
mean value: 0.8202279202279202
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.66666667 0.5 nan nan
|
|
0.83333333 0.83333333 0.33333333 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615
|
|
0.90384615 0.88461538 0.8511396 0.88603989]
|
|
|
|
mean value: 0.8737179487179488
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.25 0.5 0.25 nan nan
|
|
0.75 0.66666667 0. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.72413793 0.7037037 0.71428571 0.77777778 0.8 0.81481481
|
|
0.82142857 0.78571429 0.71428571 0.78571429]
|
|
|
|
mean value: 0.764186279875935
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.04081035 0.03964376 0.03409052 0.03344679 0.036654 0.0408752
|
|
0.04086256 0.03915644 0.03814292 0.04359317]
|
|
|
|
mean value: 0.03872756958007813
|
|
|
|
key: score_time
|
|
value: [0.01037478 0.01039386 0.01099706 0.01184702 0.0050633 0.00477147
|
|
0.01168633 0.01027298 0.01030707 0.01144981]
|
|
|
|
mean value: 0.009716367721557618
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.70710678 1. nan nan
|
|
0.70710678 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.83333333 1. nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.8 1. nan nan
|
|
0.85714286 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. nan nan 0.75 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. nan nan
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.83333333 1. nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.66666667 1. nan nan
|
|
0.75 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03194928 0.03370738 0.03454018 0.0320971 0.03061795 0.03505969
|
|
0.03510022 0.03437901 0.04472756 0.04004741]
|
|
|
|
mean value: 0.03522257804870606
|
|
|
|
key: score_time
|
|
value: [0.02213168 0.0220046 0.02188015 0.0226388 0.00919867 0.00595093
|
|
0.02261281 0.022228 0.02384186 0.02255106]
|
|
|
|
mean value: 0.019503855705261232
|
|
|
|
key: test_mcc
|
|
value: [0.70710678 0.70710678 0. 1. nan nan
|
|
1. 0.33333333 1. 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.96225045 1. 1. 1. 1. 0.92307692
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9885327371726299
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.83333333 0.5 1. nan nan
|
|
1. 0.66666667 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.98076923 1. 1. 1. 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9942307692307693
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.85714286 0.57142857 1. nan nan
|
|
1. 0.66666667 1. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.98113208 1. 1. 1. 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.994267053701016
|
|
|
|
key: test_precision
|
|
value: [0.75 0.75 0.5 1. nan nan
|
|
1. 0.66666667 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.96296296 1. 1. 1. 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9924501424501424
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. nan nan
|
|
1. 0.66666667 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9961538461538462
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.83333333 0.5 1. nan nan
|
|
1. 0.66666667 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.98076923 1. 1. 1. 1. 0.96153846
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9942307692307693
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.75 0.4 1. nan nan 1. 0.5 1. 0.75]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.96296296 1. 1. 1. 1. 0.92592593
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9888888888888889
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0098958 0.00960755 0.008389 0.00844193 0.00868154 0.00891495
|
|
0.00903392 0.00872874 0.00954485 0.00864506]
|
|
|
|
mean value: 0.008988332748413087
|
|
|
|
key: score_time
|
|
value: [0.00995326 0.00929236 0.00838757 0.00841975 0.00438452 0.00442958
|
|
0.00843525 0.00857377 0.00896025 0.00875926]
|
|
|
|
mean value: 0.007959556579589844
|
|
|
|
key: test_mcc
|
|
value: [0. 0.33333333 0.4472136 0. nan nan
|
|
0. 1. 0.61237244 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [0.70064905 0.57735027 0.73131034 0.77151675 0.6172134 0.73131034
|
|
0.6172134 0.65433031 0.70042867 0.77540056]
|
|
|
|
mean value: 0.6876723088831835
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.66666667 0.66666667 0.5 nan nan
|
|
0.5 1. 0.8 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [0.84615385 0.78846154 0.86538462 0.88461538 0.80769231 0.86538462
|
|
0.80769231 0.82692308 0.8490566 0.88679245]
|
|
|
|
mean value: 0.8428156748911466
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.66666667 0.75 0.4 nan nan
|
|
0.57142857 1. 0.66666667 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [0.83333333 0.78431373 0.8627451 0.88 0.8 0.8627451
|
|
0.8 0.82352941 0.84615385 0.88 ]
|
|
|
|
mean value: 0.8372820512820512
|
|
|
|
key: test_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[0.5 0.66666667 0.6 0.5 nan nan
|
|
0.5 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [0.90909091 0.8 0.88 0.91666667 0.83333333 0.88
|
|
0.83333333 0.84 0.88 0.91666667]
|
|
|
|
mean value: 0.8689090909090909
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 1. 0.33333333 nan nan
|
|
0.66666667 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [0.76923077 0.76923077 0.84615385 0.84615385 0.76923077 0.84615385
|
|
0.76923077 0.80769231 0.81481481 0.84615385]
|
|
|
|
mean value: 0.8084045584045584
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.66666667 0.66666667 0.5 nan nan
|
|
0.5 1. 0.75 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [0.84615385 0.78846154 0.86538462 0.88461538 0.80769231 0.86538462
|
|
0.80769231 0.82692308 0.8497151 0.88603989]
|
|
|
|
mean value: 0.8428062678062679
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.5 0.6 0.25 nan nan 0.4 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [0.71428571 0.64516129 0.75862069 0.78571429 0.66666667 0.75862069
|
|
0.66666667 0.7 0.73333333 0.78571429]
|
|
|
|
mean value: 0.7214783622013877
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00921822 0.01226902 0.01219916 0.01230478 0.01273918 0.01236653
|
|
0.01281667 0.01253247 0.01347136 0.0127697 ]
|
|
|
|
mean value: 0.012268710136413574
|
|
|
|
key: score_time
|
|
value: [0.00918436 0.01127267 0.01138401 0.01152658 0.00610924 0.00630403
|
|
0.01162601 0.01155329 0.01138902 0.01136518]
|
|
|
|
mean value: 0.01017143726348877
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.70710678 0.4472136 0.70710678 nan nan
|
|
0.4472136 1. 1. 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.89056356 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9890563556561721
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.83333333 0.66666667 0.83333333 nan nan
|
|
0.66666667 1. 1. 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.94230769 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9942307692307693
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.8 0.75 0.8 nan nan
|
|
0.5 1. 1. 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.93877551 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9938775510204082
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 0.6 1. nan nan
|
|
1. 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 0.66666667 nan nan
|
|
0.33333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 0.88461538 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9884615384615385
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.83333333 0.66666667 0.83333333 nan nan
|
|
0.66666667 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.94230769 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9942307692307693
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.66666667 0.6 0.66666667 nan nan
|
|
0.33333333 1. 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.88461538 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9884615384615385
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01245952 0.01190948 0.01206875 0.01187015 0.01200151 0.01223326
|
|
0.01400733 0.01236725 0.01193714 0.01187801]
|
|
|
|
mean value: 0.012273240089416503
|
|
|
|
key: score_time
|
|
value: [0.0105114 0.01141214 0.01137328 0.01140285 0.00604272 0.0061481
|
|
0.01152825 0.01161933 0.01146436 0.0113945 ]
|
|
|
|
mean value: 0.010289692878723144
|
|
|
|
key: test_mcc
|
|
value: [0.70710678 0.70710678 0.4472136 0.70710678 nan nan
|
|
0.4472136 0.70710678 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.54772256 0.79056942 1. 0.92704716]
|
|
|
|
mean value: 0.926533913727155
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.83333333 0.66666667 0.83333333 nan nan
|
|
0.66666667 0.83333333 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.73076923 0.88461538 1. 0.96226415]
|
|
|
|
mean value: 0.9577648766328012
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.85714286 0.75 0.8 nan nan
|
|
0.5 0.8 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.63157895 0.86956522 1. 0.96 ]
|
|
|
|
mean value: 0.9461144164759725
|
|
|
|
key: test_precision
|
|
value: [0.75 0.75 0.6 1. nan nan 1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.66666667 nan nan
|
|
0.33333333 0.66666667 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.46153846 0.76923077 1. 0.92307692]
|
|
|
|
mean value: 0.9153846153846154
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.83333333 0.66666667 0.83333333 nan nan
|
|
0.66666667 0.83333333 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.73076923 0.88461538 1. 0.96153846]
|
|
|
|
mean value: 0.9576923076923077
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.75 0.6 0.66666667 nan nan
|
|
0.33333333 0.66666667 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.46153846 0.76923077 1. 0.92307692]
|
|
|
|
mean value: 0.9153846153846154
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08268571 0.07317615 0.07111573 0.07205319 0.07330489 0.07189631
|
|
0.07267833 0.07267451 0.07413673 0.07375813]
|
|
|
|
mean value: 0.07374796867370606
|
|
|
|
key: score_time
|
|
value: [0.01459098 0.01475048 0.01495194 0.0147922 0.00463033 0.00474453
|
|
0.01556635 0.01505351 0.01488471 0.01495194]
|
|
|
|
mean value: 0.012891697883605956
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.70710678 1. nan nan
|
|
0.70710678 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.83333333 1. nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.8 1. nan nan
|
|
0.85714286 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. nan nan 0.75 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. nan nan
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.83333333 1. nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.66666667 1. nan nan
|
|
0.75 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03236103 0.03303695 0.03448248 0.02975869 0.04348493 0.04276633
|
|
0.04121518 0.0335412 0.02573681 0.03820777]
|
|
|
|
mean value: 0.035459136962890624
|
|
|
|
key: score_time
|
|
value: [0.02469873 0.02373099 0.02471375 0.0250423 0.00463319 0.00474143
|
|
0.01605988 0.02687597 0.01719236 0.02195811]
|
|
|
|
mean value: 0.018964672088623048
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.70710678 0.70710678 nan nan
|
|
0.70710678 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 0.96225045 0.96225045 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9924500897298753
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.83333333 0.83333333 nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98076923 0.98076923 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9961538461538462
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.8 0.8 nan nan
|
|
0.85714286 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98039216 0.98039216 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.996078431372549
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. nan nan 0.75 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 0.66666667 nan nan
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 0.96153846 0.96153846 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9923076923076923
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.83333333 0.83333333 nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
train_roc_auc
|
|
value: [1. 0.98076923 0.98076923 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9961538461538462
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.66666667 0.66666667 nan nan
|
|
0.75 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 0.96153846 0.96153846 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9923076923076923
|
|
|
|
MCC on Blind test: 0.95
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01034498 0.01023507 0.01016378 0.010077 0.01010346 0.01009202
|
|
0.01004457 0.01008701 0.01006556 0.01012492]
|
|
|
|
mean value: 0.010133838653564453
|
|
|
|
key: score_time
|
|
value: [0.00860214 0.0086844 0.00934911 0.00860739 0.00424981 0.00425982
|
|
0.00847292 0.00851202 0.00853586 0.008564 ]
|
|
|
|
mean value: 0.007783746719360352
|
|
|
|
key: test_mcc
|
|
value: [-0.4472136 0.33333333 0.33333333 0. nan nan
|
|
0. 0.4472136 0.16666667 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.33333333 0.66666667 0.66666667 0.5 nan nan
|
|
0.5 0.66666667 0.6 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0.66666667 0.66666667 0. nan nan
|
|
0.57142857 0.5 0.5 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 0.66666667 0.66666667 0. nan nan
|
|
0.5 1. 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.66666667 0.66666667 0. nan nan
|
|
0.66666667 0.33333333 0.5 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.33333333 0.66666667 0.66666667 0.5 nan nan
|
|
0.5 0.66666667 0.58333333 0.83333333]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0.5 0.5 0. nan nan
|
|
0.4 0.33333333 0.33333333 0.66666667]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13452578 0.13629317 0.09958744 0.12072825 0.12380791 0.09092355
|
|
0.10838723 0.11978316 0.1374228 0.13621712]
|
|
|
|
mean value: 0.12076764106750489
|
|
|
|
key: score_time
|
|
value: [0.00908279 0.00892663 0.00909948 0.00916409 0.00457883 0.00456905
|
|
0.00913429 0.00951099 0.00891829 0.00896835]
|
|
|
|
mean value: 0.008195281028747559
|
|
|
|
key: test_mcc
|
|
value: [0.70710678 0.70710678 0.70710678 1. nan nan
|
|
0.70710678 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.83333333 0.83333333 1. nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.85714286 0.8 1. nan nan
|
|
0.85714286 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 1. 1. nan nan 0.75 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.66666667 1. nan nan
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.83333333 0.83333333 1. nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.75 0.66666667 1. nan nan
|
|
0.75 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.00955558 0.00891328 0.00895953 0.00941253 0.00888014 0.00890732
|
|
0.00881815 0.00965381 0.00900769 0.01232696]
|
|
|
|
mean value: 0.00944349765777588
|
|
|
|
key: score_time
|
|
value: [0.00867105 0.00853944 0.00854969 0.00861311 0.00435209 0.00431705
|
|
0.0086236 0.00882101 0.0087564 0.01142502]
|
|
|
|
mean value: 0.008066844940185548
|
|
|
|
key: test_mcc
|
|
value: [ 0.70710678 -0.4472136 0.70710678 0.33333333 nan nan
|
|
-0.4472136 0. 0.61237244 0.61237244]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.33333333 0.83333333 0.66666667 nan nan
|
|
0.33333333 0.5 0.8 0.8 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0. 0.85714286 0.66666667 nan nan
|
|
0.5 0.57142857 0.66666667 0.85714286]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0. 0.75 0.66666667 nan nan
|
|
0.4 0.5 1. 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0. 1. 0.66666667 nan nan
|
|
0.66666667 0.66666667 0.5 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.33333333 0.83333333 0.66666667 nan nan
|
|
0.33333333 0.5 0.75 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0. 0.75 0.5 nan nan
|
|
0.33333333 0.4 0.5 0.75 ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0125978 0.01234961 0.01238513 0.01238847 0.01241469 0.01236916
|
|
0.02667212 0.01334715 0.01305318 0.0137136 ]
|
|
|
|
mean value: 0.014129090309143066
|
|
|
|
key: score_time
|
|
value: [0.01133299 0.01127529 0.01127625 0.01128983 0.00598621 0.00598669
|
|
0.01129127 0.01196098 0.0119524 0.01206422]
|
|
|
|
mean value: 0.01044161319732666
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 1. 0.4472136 1. nan nan
|
|
0.70710678 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 1. 0.66666667 1. nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 1. 0.75 1. nan nan
|
|
0.8 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 0.6 1. nan nan
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 1. nan nan
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 1. 0.66666667 1. nan nan
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 1. 0.6 1. nan nan
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['XDR'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details:
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call
|
|
return cache[method]
|
|
KeyError: 'predict'
|
|
|
|
During handling of the above exception, another exception occurred:
|
|
|
|
Traceback (most recent call last):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
|
|
scores = scorer(estimator, X_test, y_test)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__
|
|
score = scorer._score(cached_call, estimator, *args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score
|
|
y_pred = method_caller(estimator, "predict", X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call
|
|
result = getattr(estimator, method)(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict
|
|
Xt = transform.transform(Xt)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform
|
|
Xs = self._fit_transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform
|
|
return Parallel(n_jobs=self.n_jobs)(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__
|
|
while self.dispatch_one_batch(iterator):
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch
|
|
self._dispatch(tasks)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch
|
|
job = self._backend.apply_async(batch, callback=cb)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
|
|
result = ImmediateResult(func)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__
|
|
self.results = batch()
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in <listcomp>
|
|
return [func(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__
|
|
return self.function(*args, **kwargs)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one
|
|
res = transformer.transform(X)
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
|
|
X_int, X_mask = self._transform(
|
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
|
|
raise ValueError(msg)
|
|
ValueError: Found unknown categories ['Other'] in column 5 during transform
|
|
|
|
warnings.warn(
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:168: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:171: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.08033776 0.07908654 0.07868695 0.07865906 0.07928276 0.08904433
|
|
0.09892082 0.08086753 0.07872057 0.07906723]
|
|
|
|
mean value: 0.08226735591888427
|
|
|
|
key: score_time
|
|
value: [0.01172042 0.01198864 0.01163435 0.01175618 0.00623584 0.00627875
|
|
0.01186275 0.01175117 0.01167297 0.01161075]
|
|
|
|
mean value: 0.010651183128356934
|
|
|
|
key: test_mcc
|
|
value: [0. 1. 0.4472136 0.70710678 nan nan
|
|
0.33333333 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 1. 0.66666667 0.83333333 nan nan
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 1. 0.75 0.8 nan nan
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 1. 0.6 1. nan nan
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 0.66666667 nan nan
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 1. 0.66666667 0.83333333 nan nan
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 1. 0.6 0.66666667 nan nan
|
|
0.5 1. 1. 1. ]
|
|
|
|
mean value: nan
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02765584 0.02437687 0.02343535 0.02426672 0.02423263 0.0224123
|
|
0.02587819 0.02598929 0.02278066 0.02541733]
|
|
|
|
mean value: 0.02464451789855957
|
|
|
|
key: score_time
|
|
value: [0.01162863 0.01157403 0.01157665 0.01159024 0.01161504 0.01156235
|
|
0.01163483 0.0118084 0.011554 0.0116539 ]
|
|
|
|
mean value: 0.011619806289672852
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.6 0.21821789 0.40824829 0.81649658 0.65465367
|
|
0.81649658 0.65465367 0.40824829 0.40824829]
|
|
|
|
mean value: 0.5639916935606966
|
|
|
|
key: train_mcc
|
|
value: [0.91201231 0.93356387 0.84465303 0.97801929 0.93356387 0.91111111
|
|
0.91201231 0.91201231 0.93356387 0.97801929]
|
|
|
|
mean value: 0.9248531267284181
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.8 0.6 0.7 0.9 0.8 0.9 0.8 0.7 0.7]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_accuracy
|
|
value: [0.95555556 0.96666667 0.92222222 0.98888889 0.96666667 0.95555556
|
|
0.95555556 0.95555556 0.96666667 0.98888889]
|
|
|
|
mean value: 0.9622222222222223
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.8 0.5 0.72727273 0.90909091 0.75
|
|
0.90909091 0.83333333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7595454545454545
|
|
|
|
key: train_fscore
|
|
value: [0.95652174 0.96703297 0.92134831 0.98901099 0.96703297 0.95555556
|
|
0.95652174 0.95652174 0.96703297 0.98901099]
|
|
|
|
mean value: 0.962558996667448
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.8 0.66666667 0.66666667 0.83333333 1.
|
|
0.83333333 0.71428571 0.75 0.75 ]
|
|
|
|
mean value: 0.7728571428571429
|
|
|
|
key: train_precision
|
|
value: [0.93617021 0.95652174 0.93181818 0.97826087 0.95652174 0.95555556
|
|
0.93617021 0.93617021 0.95652174 0.97826087]
|
|
|
|
mean value: 0.9521971332193349
|
|
|
|
key: test_recall
|
|
value: [1. 0.8 0.4 0.8 1. 0.6 1. 1. 0.6 0.6]
|
|
|
|
mean value: 0.78
|
|
|
|
key: train_recall
|
|
value: [0.97777778 0.97777778 0.91111111 1. 0.97777778 0.95555556
|
|
0.97777778 0.97777778 0.97777778 1. ]
|
|
|
|
mean value: 0.9733333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.8 0.6 0.7 0.9 0.8 0.9 0.8 0.7 0.7]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_roc_auc /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
|
|
value: [0.95555556 0.96666667 0.92222222 0.98888889 0.96666667 0.95555556
|
|
0.95555556 0.95555556 0.96666667 0.98888889]
|
|
|
|
mean value: 0.9622222222222223
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.66666667 0.33333333 0.57142857 0.83333333 0.6
|
|
0.83333333 0.71428571 0.5 0.5 ]
|
|
|
|
mean value: 0.6266666666666667
|
|
|
|
key: train_jcc
|
|
value: [0.91666667 0.93617021 0.85416667 0.97826087 0.93617021 0.91489362
|
|
0.91666667 0.91666667 0.93617021 0.97826087]
|
|
|
|
mean value: 0.928409266111625
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.57545948 0.71273303 0.62566566 0.53029251 0.58704925 0.67743158
|
|
0.58735538 0.63677311 0.5939045 0.59990978]
|
|
|
|
mean value: 0.6126574277877808
|
|
|
|
key: score_time
|
|
value: [0.01562166 0.01615906 0.01187754 0.01739883 0.01621699 0.01188135
|
|
0.01306725 0.01308417 0.01306224 0.01307702]
|
|
|
|
mean value: 0.014144611358642579
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.81649658 0.81649658 0.40824829 0.6 0.81649658
|
|
0.65465367 0.81649658 0.81649658 0.81649658]
|
|
|
|
mean value: 0.7216535117446173
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.95555556 1. 1. 0.95555556
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9911111111111112
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 0.9 0.7 0.8 0.9 0.8 0.9 0.9 0.9]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.97777778 1. 1. 0.97777778
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9955555555555555
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.90909091 0.90909091 0.72727273 0.8 0.88888889
|
|
0.83333333 0.90909091 0.88888889 0.90909091]
|
|
|
|
mean value: 0.8608080808080808
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.97777778 1. 1. 0.97777778
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9955555555555555
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 0.83333333 0.66666667 0.8 1.
|
|
0.71428571 0.83333333 1. 0.83333333]
|
|
|
|
mean value: 0.8228571428571428
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.97777778 1. 1. 0.97777778
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9955555555555555
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.8 0.8 0.8 1. 1. 0.8 1. ]
|
|
|
|
mean value: 0.92
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.97777778 1. 1. 0.97777778
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9955555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.9 0.9 0.7 0.8 0.9 0.8 0.9 0.9 0.9]
|
|
|
|
mean value: 0.8500000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.97777778 1. 1. 0.97777778
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9955555555555555
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.83333333 0.83333333 0.57142857 0.66666667 0.8
|
|
0.71428571 0.83333333 0.8 0.83333333]
|
|
|
|
mean value: 0.76
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.95652174 1. 1. 0.95652174
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.991304347826087
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01235056 0.01039577 0.00935602 0.00850844 0.00858593 0.00864339
|
|
0.00865698 0.00895977 0.0085566 0.00893736]
|
|
|
|
mean value: 0.009295082092285157
|
|
|
|
key: score_time
|
|
value: [0.01156569 0.00890326 0.00879598 0.00853562 0.00855756 0.00884724
|
|
0.0086143 0.00869632 0.0087235 0.00869799]
|
|
|
|
mean value: 0.008993744850158691
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.5 0.21821789 0.21821789 -0.21821789 0.21821789
|
|
0.2 0.5 -0.5 0. ]
|
|
|
|
mean value: 0.11364357804719848
|
|
|
|
key: train_mcc
|
|
value: [0.56568542 0.64168895 0.57642872 0.53452248 0.48420012 0.58456547
|
|
0.69509522 0.77854709 0.53031442 0.8675239 ]
|
|
|
|
mean value: 0.6258571808190334
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.7 0.6 0.6 0.4 0.6 0.6 0.7 0.3 0.5]
|
|
|
|
mean value: 0.5499999999999999
|
|
|
|
key: train_accuracy
|
|
value: [0.76666667 0.81111111 0.77777778 0.72222222 0.73333333 0.77777778
|
|
0.84444444 0.88888889 0.75555556 0.93333333]
|
|
|
|
mean value: 0.8011111111111111
|
|
|
|
key: test_fscore
|
|
value: [0.61538462 0.76923077 0.66666667 0.66666667 0.5 0.66666667
|
|
0.6 0.76923077 0.46153846 0.54545455]
|
|
|
|
mean value: 0.6260839160839161
|
|
|
|
key: train_fscore
|
|
value: [0.8 0.83168317 0.80392157 0.7826087 0.76470588 0.80769231
|
|
0.83333333 0.88636364 0.78431373 0.93478261]
|
|
|
|
mean value: 0.8229404926524524
|
|
|
|
key: test_precision
|
|
value: [0.5 0.625 0.57142857 0.57142857 0.42857143 0.57142857
|
|
0.6 0.625 0.375 0.5 ]
|
|
|
|
mean value: 0.5367857142857143
|
|
|
|
key: train_precision
|
|
value: [0.7 0.75 0.71929825 0.64285714 0.68421053 0.71186441
|
|
0.8974359 0.90697674 0.70175439 0.91489362]
|
|
|
|
mean value: 0.7629290966174761
|
|
|
|
key: test_recall
|
|
value: [0.8 1. 0.8 0.8 0.6 0.8 0.6 1. 0.6 0.6]
|
|
|
|
mean value: 0.76
|
|
|
|
key: train_recall
|
|
value: [0.93333333 0.93333333 0.91111111 1. 0.86666667 0.93333333
|
|
0.77777778 0.86666667 0.88888889 0.95555556]
|
|
|
|
mean value: 0.9066666666666667
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.7 0.6 0.6 0.4 0.6 0.6 0.7 0.3 0.5]
|
|
|
|
mean value: 0.55
|
|
|
|
key: train_roc_auc
|
|
value: [0.76666667 0.81111111 0.77777778 0.72222222 0.73333333 0.77777778
|
|
0.84444444 0.88888889 0.75555556 0.93333333]
|
|
|
|
mean value: 0.8011111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.44444444 0.625 0.5 0.5 0.33333333 0.5
|
|
0.42857143 0.625 0.3 0.375 ]
|
|
|
|
mean value: 0.4631349206349206
|
|
|
|
key: train_jcc
|
|
value: [0.66666667 0.71186441 0.67213115 0.64285714 0.61904762 0.67741935
|
|
0.71428571 0.79591837 0.64516129 0.87755102]
|
|
|
|
mean value: 0.7022902730094179
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00903225 0.00919151 0.00904417 0.00956297 0.00893402 0.00883603
|
|
0.00865221 0.00857472 0.00858474 0.00852299]
|
|
|
|
mean value: 0.008893561363220216
|
|
|
|
key: score_time
|
|
value: [0.00895262 0.00891352 0.00872064 0.00939298 0.00859714 0.00868654
|
|
0.00841737 0.00842023 0.00843048 0.00861788]
|
|
|
|
mean value: 0.008714938163757324
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.6 0. 0.21821789 0.40824829 0.65465367
|
|
0.6 0.65465367 0.5 0. ]
|
|
|
|
mean value: 0.4044021812579673
|
|
|
|
key: train_mcc
|
|
value: [0.68888889 0.58137767 0.60540551 0.66683134 0.64700558 0.55776344
|
|
0.69162666 0.58137767 0.71269665 0.70004007]
|
|
|
|
mean value: 0.6433013479547345
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_accuracy
|
|
value: [0.84444444 0.78888889 0.8 0.83333333 0.82222222 0.77777778
|
|
0.84444444 0.78888889 0.85555556 0.84444444]
|
|
|
|
mean value: 0.82
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.8 0.44444444 0.66666667 0.66666667 0.75
|
|
0.8 0.83333333 0.57142857 0.28571429]
|
|
|
|
mean value: 0.6545526695526696
|
|
|
|
key: train_fscore
|
|
value: [0.84444444 0.77647059 0.78571429 0.83146067 0.81395349 0.76744186
|
|
0.8372093 0.77647059 0.85057471 0.82926829]
|
|
|
|
mean value: 0.8113008237276017
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.8 0.5 0.57142857 0.75 1.
|
|
0.8 0.71428571 1. 0.5 ]
|
|
|
|
mean value: 0.7302380952380952
|
|
|
|
key: train_precision
|
|
value: [0.84444444 0.825 0.84615385 0.84090909 0.85365854 0.80487805
|
|
0.87804878 0.825 0.88095238 0.91891892]
|
|
|
|
mean value: 0.851796404723234
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 0.4 0.8 0.6 0.6 0.8 1. 0.4 0.2]
|
|
|
|
mean value: 0.64
|
|
|
|
key: train_recall
|
|
value: [0.84444444 0.73333333 0.73333333 0.82222222 0.77777778 0.73333333
|
|
0.8 0.73333333 0.82222222 0.75555556]
|
|
|
|
mean value: 0.7755555555555556
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5]
|
|
|
|
mean value: 0.6900000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.84444444 0.78888889 0.8 0.83333333 0.82222222 0.77777778
|
|
0.84444444 0.78888889 0.85555556 0.84444444]
|
|
|
|
mean value: 0.82
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.66666667 0.28571429 0.5 0.5 0.6
|
|
0.66666667 0.71428571 0.4 0.16666667]
|
|
|
|
mean value: 0.5071428571428571
|
|
|
|
key: train_jcc
|
|
value: [0.73076923 0.63461538 0.64705882 0.71153846 0.68627451 0.62264151
|
|
0.72 0.63461538 0.74 0.70833333]
|
|
|
|
mean value: 0.683584663763909
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.0083065 0.00893617 0.0082252 0.00910974 0.00897431 0.00820327
|
|
0.00906706 0.00823522 0.00894833 0.00931168]
|
|
|
|
mean value: 0.008731746673583984
|
|
|
|
key: score_time
|
|
value: [0.00974131 0.00983167 0.00941205 0.00987244 0.00959492 0.00921226
|
|
0.00989819 0.01411557 0.01460791 0.01599026]
|
|
|
|
mean value: 0.011227655410766601
|
|
|
|
key: test_mcc
|
|
value: [ 0.33333333 0.40824829 -0.21821789 0. 0.2 0.40824829
|
|
0.40824829 0. 0.33333333 0. ]
|
|
|
|
mean value: 0.18731936478222633
|
|
|
|
key: train_mcc
|
|
value: [0.60540551 0.57906602 0.53452248 0.69162666 0.68888889 0.57906602
|
|
0.51571581 0.53452248 0.46712826 0.51314236]
|
|
|
|
mean value: 0.5709084504401615
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.7 0.4 0.5 0.6 0.7 0.7 0.5 0.6 0.5]
|
|
|
|
mean value: 0.58
|
|
|
|
key: train_accuracy
|
|
value: [0.8 0.78888889 0.76666667 0.84444444 0.84444444 0.78888889
|
|
0.75555556 0.76666667 0.73333333 0.75555556]
|
|
|
|
mean value: 0.7844444444444445
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.72727273 0.25 0.61538462 0.6 0.66666667
|
|
0.72727273 0.54545455 0.33333333 0.28571429]
|
|
|
|
mean value: 0.5465384615384615
|
|
|
|
key: train_fscore
|
|
value: [0.8125 0.79569892 0.75862069 0.85106383 0.84444444 0.79569892
|
|
0.73809524 0.75862069 0.72727273 0.76595745]
|
|
|
|
mean value: 0.7847972915180865
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.66666667 0.33333333 0.5 0.6 0.75
|
|
0.66666667 0.5 1. 0.5 ]
|
|
|
|
mean value: 0.6072222222222222
|
|
|
|
key: train_precision
|
|
value: [0.76470588 0.77083333 0.78571429 0.81632653 0.84444444 0.77083333
|
|
0.79487179 0.78571429 0.74418605 0.73469388]
|
|
|
|
mean value: 0.7812323814439311
|
|
|
|
key: test_recall
|
|
value: [1. 0.8 0.2 0.8 0.6 0.6 0.8 0.6 0.2 0.2]
|
|
|
|
mean value: 0.58
|
|
|
|
key: train_recall
|
|
value: [0.86666667 0.82222222 0.73333333 0.88888889 0.84444444 0.82222222
|
|
0.68888889 0.73333333 0.71111111 0.8 ]
|
|
|
|
mean value: 0.7911111111111111
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.7 0.4 0.5 0.6 0.7 0.7 0.5 0.6 0.5]
|
|
|
|
mean value: 0.5800000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.8 0.78888889 0.76666667 0.84444444 0.84444444 0.78888889
|
|
0.75555556 0.76666667 0.73333333 0.75555556]
|
|
|
|
mean value: 0.7844444444444445
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.57142857 0.14285714 0.44444444 0.42857143 0.5
|
|
0.57142857 0.375 0.2 0.16666667]
|
|
|
|
mean value: 0.3955952380952381
|
|
|
|
key: train_jcc
|
|
value: [0.68421053 0.66071429 0.61111111 0.74074074 0.73076923 0.66071429
|
|
0.58490566 0.61111111 0.57142857 0.62068966]
|
|
|
|
mean value: 0.6476395178454898
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01041508 0.0102284 0.00914741 0.00940919 0.00931716 0.00927925
|
|
0.00927663 0.00954556 0.00942039 0.0091517 ]
|
|
|
|
mean value: 0.009519076347351075
|
|
|
|
key: score_time
|
|
value: [0.00887251 0.00886989 0.00857925 0.0089817 0.00945759 0.00883317
|
|
0.00878358 0.00884509 0.00862408 0.00881624]
|
|
|
|
mean value: 0.008866310119628906
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.6 0.21821789 0.21821789 0.6 0.65465367
|
|
0.40824829 0.65465367 0.2 0.5 ]
|
|
|
|
mean value: 0.4870487993279528
|
|
|
|
key: train_mcc
|
|
value: [0.8675239 0.86666667 0.85485041 0.86666667 0.84632727 0.85485041
|
|
0.79036782 0.84632727 0.82548988 0.81649658]
|
|
|
|
mean value: 0.8435566879416903
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 0.6 0.6 0.8 0.8 0.7 0.8 0.6 0.7]
|
|
|
|
mean value: 0.73
|
|
|
|
key: train_accuracy
|
|
value: [0.93333333 0.93333333 0.92222222 0.93333333 0.92222222 0.92222222
|
|
0.88888889 0.92222222 0.91111111 0.9 ]
|
|
|
|
mean value: 0.9188888888888889
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.8 0.5 0.66666667 0.8 0.75
|
|
0.66666667 0.83333333 0.6 0.57142857]
|
|
|
|
mean value: 0.7097186147186147
|
|
|
|
key: train_fscore
|
|
value: [0.93181818 0.93333333 0.91566265 0.93333333 0.91954023 0.91566265
|
|
0.87804878 0.91954023 0.90697674 0.88888889]
|
|
|
|
mean value: 0.9142805023022523
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.8 0.66666667 0.57142857 0.8 1.
|
|
0.75 0.71428571 0.6 1. ]
|
|
|
|
mean value: 0.7735714285714286
|
|
|
|
key: train_precision
|
|
value: [0.95348837 0.93333333 1. 0.93333333 0.95238095 1.
|
|
0.97297297 0.95238095 0.95121951 1. ]
|
|
|
|
mean value: 0.964910942868969
|
|
|
|
key: test_recall
|
|
value: [1. 0.8 0.4 0.8 0.8 0.6 0.6 1. 0.6 0.4]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_recall
|
|
value: [0.91111111 0.93333333 0.84444444 0.93333333 0.88888889 0.84444444
|
|
0.8 0.88888889 0.86666667 0.8 ]
|
|
|
|
mean value: 0.8711111111111112
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 0.6 0.6 0.8 0.8 0.7 0.8 0.6 0.7]
|
|
|
|
mean value: 0.73
|
|
|
|
key: train_roc_auc
|
|
value: [0.93333333 0.93333333 0.92222222 0.93333333 0.92222222 0.92222222
|
|
0.88888889 0.92222222 0.91111111 0.9 ]
|
|
|
|
mean value: 0.9188888888888889
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.66666667 0.33333333 0.5 0.66666667 0.6
|
|
0.5 0.71428571 0.42857143 0.4 ]
|
|
|
|
mean value: 0.5642857142857143
|
|
|
|
key: train_jcc
|
|
value: [0.87234043 0.875 0.84444444 0.875 0.85106383 0.84444444
|
|
0.7826087 0.85106383 0.82978723 0.8 ]
|
|
|
|
mean value: 0.8425752903689999
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.39881611 0.41650391 0.42689157 0.3460288 0.48951459 0.41404629
|
|
0.42462683 0.40188336 0.39558363 0.48087025]
|
|
|
|
mean value: 0.4194765329360962
|
|
|
|
key: score_time
|
|
value: [0.01196933 0.01197171 0.01200271 0.01200986 0.01196694 0.01200986
|
|
0.01199579 0.01200604 0.01202822 0.01213503]
|
|
|
|
mean value: 0.012009549140930175
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 1. 0.40824829 0.81649658 0.6
|
|
0.40824829 0.5 0.81649658 0.40824829]
|
|
|
|
mean value: 0.6590731195102493
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 1. 0.7 0.9 0.8 0.7 0.7 0.9 0.7]
|
|
|
|
mean value: 0.82
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 1. 0.72727273 0.88888889 0.8
|
|
0.72727273 0.76923077 0.88888889 0.66666667]
|
|
|
|
mean value: 0.8286402486402487
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 1. 0.66666667 1. 0.8
|
|
0.66666667 0.625 1. 0.75 ]
|
|
|
|
mean value: 0.8175
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.8 0.8 0.8 0.8 1. 0.8 0.6]
|
|
|
|
mean value: 0.86
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 1. 0.7 0.9 0.8 0.7 0.7 0.9 0.7]
|
|
|
|
mean value: 0.8200000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 1. 0.57142857 0.8 0.66666667
|
|
0.57142857 0.625 0.8 0.5 ]
|
|
|
|
mean value: 0.7201190476190477
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01629376 0.01320314 0.01022458 0.00981021 0.00979257 0.00974083
|
|
0.00949192 0.0102129 0.00986719 0.00993228]
|
|
|
|
mean value: 0.010856938362121583
|
|
|
|
key: score_time
|
|
value: [0.0115726 0.00897956 0.0086844 0.00850129 0.00838923 0.00844407
|
|
0.00838757 0.00920272 0.00845051 0.00858712]
|
|
|
|
mean value: 0.008919906616210938
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 0.81649658 0.65465367 0.81649658 0.81649658
|
|
0.81649658 0.81649658 0.6 0.81649658]
|
|
|
|
mean value: 0.7786626318129786
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.9 0.8 0.9 0.9 0.9 0.9 0.8 0.9]
|
|
|
|
mean value: 0.88
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.90909091 0.83333333 0.88888889 0.90909091
|
|
0.88888889 0.90909091 0.8 0.90909091]
|
|
|
|
mean value: 0.8865656565656566
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.83333333 0.71428571 1. 0.83333333
|
|
1. 0.83333333 0.8 0.83333333]
|
|
|
|
mean value: 0.8514285714285714
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.8 1. 0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.9400000000000001
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.9 0.8 0.9 0.9 0.9 0.9 0.8 0.9]
|
|
|
|
mean value: 0.88
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.83333333 0.71428571 0.8 0.83333333
|
|
0.8 0.83333333 0.66666667 0.83333333]
|
|
|
|
mean value: 0.7980952380952381
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0893023 0.08990002 0.08999562 0.0806818 0.08832645 0.08767629
|
|
0.08837485 0.08918786 0.0886693 0.0875802 ]
|
|
|
|
mean value: 0.08796947002410889
|
|
|
|
key: score_time
|
|
value: [0.01877093 0.01870561 0.01703525 0.01690221 0.01881409 0.01816916
|
|
0.01854229 0.01852489 0.01844764 0.01862383]
|
|
|
|
mean value: 0.018253588676452638
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.81649658 0.81649658 0.21821789 0.6 0.81649658
|
|
0.65465367 0.81649658 0.81649658 0.5 ]
|
|
|
|
mean value: 0.6463602756046463
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.9 0.9 0.6 0.8 0.9 0.8 0.9 0.9 0.7]
|
|
|
|
mean value: 0.81
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.90909091 0.90909091 0.66666667 0.8 0.88888889
|
|
0.75 0.90909091 0.88888889 0.57142857]
|
|
|
|
mean value: 0.802041847041847
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.83333333 0.83333333 0.57142857 0.8 1.
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.8538095238095238
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 1. 1. 0.8 0.8 0.8 0.6 1. 0.8 0.4]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.9 0.9 0.6 0.8 0.9 0.8 0.9 0.9 0.7]
|
|
|
|
mean value: 0.81
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.83333333 0.83333333 0.5 0.66666667 0.8
|
|
0.6 0.83333333 0.8 0.4 ]
|
|
|
|
mean value: 0.6838095238095239
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00986981 0.00918651 0.00949073 0.00941229 0.00946736 0.00948048
|
|
0.00914884 0.00917602 0.00864077 0.0092082 ]
|
|
|
|
mean value: 0.009308099746704102
|
|
|
|
key: score_time
|
|
value: [0.00936985 0.00940537 0.00926566 0.00926495 0.00862026 0.00878453
|
|
0.00922108 0.00850725 0.00934005 0.00925589]
|
|
|
|
mean value: 0.009103488922119141
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.5 1. 0.5 0.6 0.
|
|
0.81649658 0.5 0.6 0.5 ]
|
|
|
|
mean value: 0.5671150251635704
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.7 1. 0.7 0.8 0.5 0.9 0.7 0.8 0.7]
|
|
|
|
mean value: 0.76
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.76923077 1. 0.76923077 0.8 0.54545455
|
|
0.88888889 0.76923077 0.8 0.57142857]
|
|
|
|
mean value: 0.7746797646797647
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.625 1. 0.625 0.8 0.5
|
|
1. 0.625 0.8 1. ]
|
|
|
|
mean value: 0.7689285714285714
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.8 0.6 0.8 1. 0.8 0.4]
|
|
|
|
mean value: 0.84
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.7 1. 0.7 0.8 0.5 0.9 0.7 0.8 0.7]
|
|
|
|
mean value: 0.76
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.625 1. 0.625 0.66666667 0.375
|
|
0.8 0.625 0.66666667 0.4 ]
|
|
|
|
mean value: 0.6497619047619048
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.04
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.15932965 1.05107379 1.05955648 1.02203918 1.01323223 1.02794361
|
|
1.01532245 1.02316165 1.02073669 1.01626611]
|
|
|
|
mean value: 1.0408661842346192
|
|
|
|
key: score_time
|
|
value: [0.09322572 0.09367871 0.09359598 0.09217787 0.08568668 0.08590388
|
|
0.09312415 0.09303379 0.08970332 0.09149098]
|
|
|
|
mean value: 0.091162109375
|
|
|
|
key: test_mcc
|
|
value: [0.6 0.65465367 1. 0.21821789 0.81649658 1.
|
|
0.81649658 0.81649658 0.6 0.5 ]
|
|
|
|
mean value: 0.7022361303727148
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.8 1. 0.6 0.9 1. 0.9 0.9 0.8 0.7]
|
|
|
|
mean value: 0.84
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.83333333 1. 0.66666667 0.88888889 1.
|
|
0.88888889 0.90909091 0.8 0.57142857]
|
|
|
|
mean value: 0.8358297258297258
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.71428571 1. 0.57142857 1. 1.
|
|
1. 0.83333333 0.8 1. ]
|
|
|
|
mean value: 0.871904761904762
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 1. 1. 0.8 0.8 1. 0.8 1. 0.8 0.4]
|
|
|
|
mean value: 0.8400000000000001
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.8 1. 0.6 0.9 1. 0.9 0.9 0.8 0.7]
|
|
|
|
mean value: 0.8400000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.71428571 1. 0.5 0.8 1.
|
|
0.8 0.83333333 0.66666667 0.4 ]
|
|
|
|
mean value: 0.7380952380952381
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.84170556 0.83023262 0.82111239 0.94291162 0.86192775 0.89486432
|
|
0.85397744 0.87075162 0.84488964 0.99061775]
|
|
|
|
mean value: 0.875299072265625
|
|
|
|
key: score_time
|
|
value: [0.19168687 0.20659614 0.23333478 0.17990088 0.21830344 0.18155169
|
|
0.20949388 0.21993876 0.20162749 0.21077156]
|
|
|
|
mean value: 0.20532054901123048
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.65465367 0.81649658 0.21821789 0.81649658 0.65465367
|
|
0.81649658 0.81649658 0.65465367 0.5 ]
|
|
|
|
mean value: 0.6764661806998554
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97801929 0.97801929 1. 0.97801929 0.97801929
|
|
0.95650071 1. 0.95650071 1. ]
|
|
|
|
mean value: 0.9825078604565161
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.8 0.9 0.6 0.9 0.8 0.9 0.9 0.8 0.7]
|
|
|
|
mean value: 0.8200000000000001
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98888889 0.98888889 1. 0.98888889 0.98888889
|
|
0.97777778 1. 0.97777778 1. ]
|
|
|
|
mean value: 0.9911111111111112
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.83333333 0.88888889 0.66666667 0.88888889 0.75
|
|
0.88888889 0.90909091 0.75 0.57142857]
|
|
|
|
mean value: 0.8036075036075037
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98901099 0.98876404 1. 0.98876404 0.98876404
|
|
0.97727273 1. 0.97727273 1. ]
|
|
|
|
mean value: 0.9909848578387904
|
|
|
|
key: test_precision
|
|
value: [1. 0.71428571 1. 0.57142857 1. 1.
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.9119047619047619
|
|
|
|
key: train_precision
|
|
value: [1. 0.97826087 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9978260869565218
|
|
|
|
key: test_recall
|
|
value: [0.8 1. 0.8 0.8 0.8 0.6 0.8 1. 0.6 0.4]
|
|
|
|
mean value: 0.76
|
|
|
|
key: train_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[1. 1. 0.97777778 1. 0.97777778 0.97777778
|
|
0.95555556 1. 0.95555556 1. ]
|
|
|
|
mean value: 0.9844444444444445
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.8 0.9 0.6 0.9 0.8 0.9 0.9 0.8 0.7]
|
|
|
|
mean value: 0.8200000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98888889 0.98888889 1. 0.98888889 0.98888889
|
|
0.97777778 1. 0.97777778 1. ]
|
|
|
|
mean value: 0.9911111111111112
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.71428571 0.8 0.5 0.8 0.6
|
|
0.8 0.83333333 0.6 0.4 ]
|
|
|
|
mean value: 0.6847619047619048
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97826087 0.97777778 1. 0.97777778 0.97777778
|
|
0.95555556 1. 0.95555556 1. ]
|
|
|
|
mean value: 0.9822705314009662
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02315879 0.01028872 0.00969672 0.00906706 0.00864863 0.00874782
|
|
0.00876069 0.00871038 0.00945473 0.00973272]
|
|
|
|
mean value: 0.010626626014709473
|
|
|
|
key: score_time
|
|
value: [0.01252508 0.00953984 0.00998116 0.00864005 0.00861979 0.00862026
|
|
0.0085988 0.00850296 0.00862026 0.00932288]
|
|
|
|
mean value: 0.00929710865020752
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.6 0. 0.21821789 0.40824829 0.65465367
|
|
0.6 0.65465367 0.5 0. ]
|
|
|
|
mean value: 0.4044021812579673
|
|
|
|
key: train_mcc
|
|
value: [0.68888889 0.58137767 0.60540551 0.66683134 0.64700558 0.55776344
|
|
0.69162666 0.58137767 0.71269665 0.70004007]
|
|
|
|
mean value: 0.6433013479547345
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_accuracy
|
|
value: [0.84444444 0.78888889 0.8 0.83333333 0.82222222 0.77777778
|
|
0.84444444 0.78888889 0.85555556 0.84444444]
|
|
|
|
mean value: 0.82
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.8 0.44444444 0.66666667 0.66666667 0.75
|
|
0.8 0.83333333 0.57142857 0.28571429]
|
|
|
|
mean value: 0.6545526695526696
|
|
|
|
key: train_fscore
|
|
value: [0.84444444 0.77647059 0.78571429 0.83146067 0.81395349 0.76744186
|
|
0.8372093 0.77647059 0.85057471 0.82926829]
|
|
|
|
mean value: 0.8113008237276017
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.8 0.5 0.57142857 0.75 1.
|
|
0.8 0.71428571 1. 0.5 ]
|
|
|
|
mean value: 0.7302380952380952
|
|
|
|
key: train_precision
|
|
value: [0.84444444 0.825 0.84615385 0.84090909 0.85365854 0.80487805
|
|
0.87804878 0.825 0.88095238 0.91891892]
|
|
|
|
mean value: 0.851796404723234
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 0.4 0.8 0.6 0.6 0.8 1. 0.4 0.2]
|
|
|
|
mean value: 0.64
|
|
|
|
key: train_recall
|
|
value: [0.84444444 0.73333333 0.73333333 0.82222222 0.77777778 0.73333333
|
|
0.8 0.73333333 0.82222222 0.75555556]
|
|
|
|
mean value: 0.7755555555555556
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5]
|
|
|
|
mean value: 0.6900000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.84444444 0.78888889 0.8 0.83333333 0.82222222 0.77777778
|
|
0.84444444 0.78888889 0.85555556 0.84444444]
|
|
|
|
mean value: 0.82
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.66666667 0.28571429 0.5 0.5 0.6
|
|
0.66666667 0.71428571 0.4 0.16666667]
|
|
|
|
mean value: 0.5071428571428571
|
|
|
|
key: train_jcc
|
|
value: [0.73076923 0.63461538 0.64705882 0.71153846 0.68627451 0.62264151
|
|
0.72 0.63461538 0.74 0.70833333]
|
|
|
|
mean value: 0.683584663763909
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.0782187 0.035676 0.03869748 0.19958353 0.03675485 0.04161453
|
|
0.23583102 0.50171185 0.18434334 0.07802296]
|
|
|
|
mean value: 0.14304542541503906
|
|
|
|
key: score_time
|
|
value: [0.01095033 0.01015282 0.01051188 0.01092458 0.01066589 0.01025844
|
|
0.01311946 0.01248908 0.01308417 0.01076651]
|
|
|
|
mean value: 0.011292314529418946
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 1. 0.81649658 0.81649658 0.81649658 1.
|
|
0.81649658 0.81649658 1. 1. ]
|
|
|
|
mean value: 0.8737136575346607
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 1. 0.9 0.9 0.9 1. 0.9 0.9 1. 1. ]
|
|
|
|
mean value: 0.93
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 1. 0.90909091 0.90909091 0.88888889 1.
|
|
0.88888889 0.90909091 1. 1. ]
|
|
|
|
mean value: 0.9338383838383838
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 1. 0.83333333 0.83333333 1. 1.
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.9214285714285715
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.8 1. 0.8 1. 1. 1. ]
|
|
|
|
mean value: 0.96
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 1. 0.9 0.9 0.9 1. 0.9 0.9 1. 1. ]
|
|
|
|
mean value: 0.93
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 1. 0.83333333 0.83333333 0.8 1.
|
|
0.8 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.8814285714285715
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02349949 0.04713821 0.06608343 0.06277347 0.04959702 0.0388329
|
|
0.03968954 0.053617 0.0484674 0.04304004]
|
|
|
|
mean value: 0.047273850440979
|
|
|
|
key: score_time
|
|
value: [0.02380705 0.02124381 0.0237565 0.03547454 0.02019763 0.02404904
|
|
0.02267098 0.02003694 0.0200119 0.02282238]
|
|
|
|
mean value: 0.02340707778930664
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.81649658 0.5 0. 0.21821789 0.81649658
|
|
0.81649658 0.33333333 0.81649658 0.21821789]
|
|
|
|
mean value: 0.48690887708495556
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 0.97801929 1.
|
|
1. 1. 0.97801929 0.97801929]
|
|
|
|
mean value: 0.9934057881530954
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.9 0.7 0.5 0.6 0.9 0.9 0.6 0.9 0.6]
|
|
|
|
mean value: 0.72
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 0.98888889 1.
|
|
1. 1. 0.98888889 0.98888889]
|
|
|
|
mean value: 0.9966666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.90909091 0.76923077 0.61538462 0.66666667 0.90909091
|
|
0.90909091 0.71428571 0.88888889 0.5 ]
|
|
|
|
mean value: 0.7596015096015096
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 0.98901099 1.
|
|
1. 1. 0.98876404 0.98876404]
|
|
|
|
mean value: 0.996653907889863
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.83333333 0.625 0.5 0.57142857 0.83333333
|
|
0.83333333 0.55555556 1. 0.66666667]
|
|
|
|
mean value: 0.6974206349206349
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0.97826087 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9978260869565218
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.8 0.8 1. 1. 1. 0.8 0.4]
|
|
|
|
mean value: 0.88
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.97777778 0.97777778]
|
|
|
|
mean value: 0.9955555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.9 0.7 0.5 0.6 0.9 0.9 0.6 0.9 0.6]
|
|
|
|
mean value: 0.72
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 0.98888889 1.
|
|
1. 1. 0.98888889 0.98888889]
|
|
|
|
mean value: 0.9966666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.83333333 0.625 0.44444444 0.5 0.83333333
|
|
0.83333333 0.55555556 0.8 0.33333333]
|
|
|
|
mean value: 0.6313888888888889
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 0.97826087 1.
|
|
1. 1. 0.97777778 0.97777778]
|
|
|
|
mean value: 0.9933816425120773
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01617742 0.00876117 0.00850463 0.0087924 0.00857806 0.00921059
|
|
0.00938845 0.0089817 0.00929332 0.00935221]
|
|
|
|
mean value: 0.009703993797302246
|
|
|
|
key: score_time
|
|
value: [0.00881886 0.0085144 0.00851989 0.00928068 0.00837755 0.00896335
|
|
0.00911069 0.00911427 0.00912476 0.00914335]
|
|
|
|
mean value: 0.008896780014038087
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.40824829 0.21821789 0.40824829 0.40824829 0.65465367
|
|
0. 0.6 0.2 0.21821789]
|
|
|
|
mean value: 0.3524082613035414
|
|
|
|
key: train_mcc
|
|
value: [0.62237591 0.62237591 0.58137767 0.60238451 0.56056066 0.60540551
|
|
0.56056066 0.55610507 0.56056066 0.55610507]
|
|
|
|
mean value: 0.5827811645657781
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.7 0.6 0.7 0.7 0.8 0.5 0.8 0.6 0.6]
|
|
|
|
mean value: 0.6699999999999999
|
|
|
|
key: train_accuracy
|
|
value: [0.81111111 0.81111111 0.78888889 0.8 0.77777778 0.8
|
|
0.77777778 0.77777778 0.77777778 0.77777778]
|
|
|
|
mean value: 0.79
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.72727273 0.5 0.72727273 0.72727273 0.75
|
|
0.54545455 0.8 0.6 0.5 ]
|
|
|
|
mean value: 0.6604545454545454
|
|
|
|
key: train_fscore
|
|
value: [0.80898876 0.81318681 0.77647059 0.80851064 0.76190476 0.78571429
|
|
0.76190476 0.77272727 0.76190476 0.77272727]
|
|
|
|
mean value: 0.782403992064804
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 1.
|
|
0.5 0.8 0.6 0.66666667]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_precision
|
|
value: [0.81818182 0.80434783 0.825 0.7755102 0.82051282 0.84615385
|
|
0.82051282 0.79069767 0.82051282 0.79069767]
|
|
|
|
mean value: 0.8112127504879925
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 0.4 0.8 0.8 0.6 0.6 0.8 0.6 0.4]
|
|
|
|
mean value: 0.66
|
|
|
|
key: train_recall
|
|
value: [0.8 0.82222222 0.73333333 0.84444444 0.71111111 0.73333333
|
|
0.71111111 0.75555556 0.71111111 0.75555556]
|
|
|
|
mean value: 0.7577777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.7 0.6 0.7 0.7 0.8 0.5 0.8 0.6 0.6]
|
|
|
|
mean value: 0.67
|
|
|
|
key: train_roc_auc
|
|
value: [0.81111111 0.81111111 0.78888889 0.8 0.77777778 0.8
|
|
0.77777778 0.77777778 0.77777778 0.77777778]
|
|
|
|
mean value: 0.79
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.57142857 0.33333333 0.57142857 0.57142857 0.6
|
|
0.375 0.66666667 0.42857143 0.33333333]
|
|
|
|
mean value: 0.5022619047619047
|
|
|
|
key: train_jcc
|
|
value: [0.67924528 0.68518519 0.63461538 0.67857143 0.61538462 0.64705882
|
|
0.61538462 0.62962963 0.61538462 0.62962963]
|
|
|
|
mean value: 0.6430089210333384
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01009655 0.01374936 0.0128026 0.01307821 0.01348066 0.01305318
|
|
0.01311874 0.01353908 0.01283336 0.01359773]
|
|
|
|
mean value: 0.01293494701385498
|
|
|
|
key: score_time
|
|
value: [0.0084734 0.01106596 0.01120782 0.0111711 0.0113287 0.01123643
|
|
0.01121616 0.01123714 0.01122808 0.01128221]
|
|
|
|
mean value: 0.010944700241088868
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.81649658 0.81649658 0.21821789 0.81649658 0.5
|
|
0.65465367 0.65465367 0.81649658 0.40824829]
|
|
|
|
mean value: 0.6356413516534691
|
|
|
|
key: train_mcc
|
|
value: [0.93541435 0.93541435 0.91473203 0.97801929 0.97801929 0.89442719
|
|
0.91201231 0.93541435 0.97801929 0.95650071]
|
|
|
|
mean value: 0.9417973170390846
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 0.9 0.6 0.9 0.7 0.8 0.8 0.9 0.7]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_accuracy
|
|
value: [0.96666667 0.96666667 0.95555556 0.98888889 0.98888889 0.94444444
|
|
0.95555556 0.96666667 0.98888889 0.97777778]
|
|
|
|
mean value: 0.97
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.90909091 0.90909091 0.66666667 0.88888889 0.57142857
|
|
0.83333333 0.83333333 0.88888889 0.66666667]
|
|
|
|
mean value: 0.80007215007215
|
|
|
|
key: train_fscore
|
|
value: [0.96774194 0.96774194 0.95744681 0.98901099 0.98901099 0.94117647
|
|
0.95652174 0.96774194 0.98901099 0.97727273]
|
|
|
|
mean value: 0.9702676518986616
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 0.83333333 0.57142857 1. 1.
|
|
0.71428571 0.71428571 1. 0.75 ]
|
|
|
|
mean value: 0.8130952380952381
|
|
|
|
key: train_precision
|
|
value: [0.9375 0.9375 0.91836735 0.97826087 0.97826087 1.
|
|
0.93617021 0.9375 0.97826087 1. ]
|
|
|
|
mean value: 0.9601820168400386
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.8 0.8 0.4 1. 1. 0.8 0.6]
|
|
|
|
mean value: 0.84
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 0.88888889
|
|
0.97777778 1. 1. 0.95555556]
|
|
|
|
mean value: 0.9822222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.9 0.9 0.6 0.9 0.7 0.8 0.8 0.9 0.7]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_roc_auc
|
|
value: [0.96666667 0.96666667 0.95555556 0.98888889 0.98888889 0.94444444
|
|
0.95555556 0.96666667 0.98888889 0.97777778]
|
|
|
|
mean value: 0.97
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.83333333 0.83333333 0.5 0.8 0.4
|
|
0.71428571 0.71428571 0.8 0.5 ]
|
|
|
|
mean value: 0.680952380952381
|
|
|
|
key: train_jcc
|
|
value: [0.9375 0.9375 0.91836735 0.97826087 0.97826087 0.88888889
|
|
0.91666667 0.9375 0.97826087 0.95555556]
|
|
|
|
mean value: 0.9426761066745539
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01266241 0.01253319 0.01259041 0.0122745 0.01273322 0.01224399
|
|
0.01242423 0.01244211 0.01222348 0.01222324]
|
|
|
|
mean value: 0.012435078620910645
|
|
|
|
key: score_time
|
|
value: [0.01101017 0.01124358 0.01118398 0.01122379 0.01121879 0.01122594
|
|
0.01122475 0.01121068 0.01118016 0.01124597]
|
|
|
|
mean value: 0.011196780204772949
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.81649658 0.81649658 0.21821789 0.40824829 0.65465367
|
|
0.81649658 0.65465367 0.81649658 0.81649658]
|
|
|
|
mean value: 0.6672910097462417
|
|
|
|
key: train_mcc
|
|
value: [0.93356387 0.97801929 0.95650071 1. 1. 0.88910845
|
|
0.93356387 0.97801929 0.97801929 0.97801929]
|
|
|
|
mean value: 0.9624814081711083
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 0.9 0.6 0.7 0.8 0.9 0.8 0.9 0.9]
|
|
|
|
mean value: 0.8200000000000001
|
|
|
|
key: train_accuracy
|
|
value: [0.96666667 0.98888889 0.97777778 1. 1. 0.94444444
|
|
0.96666667 0.98888889 0.98888889 0.98888889]
|
|
|
|
mean value: 0.9811111111111112
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.90909091 0.90909091 0.66666667 0.72727273 0.75
|
|
0.90909091 0.83333333 0.88888889 0.90909091]
|
|
|
|
mean value: 0.8335858585858585
|
|
|
|
key: train_fscore
|
|
value: [0.96629213 0.98876404 0.97727273 1. 1. 0.94382022
|
|
0.96703297 0.98876404 0.98901099 0.98901099]
|
|
|
|
mean value: 0.9809968121765875
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 0.83333333 0.57142857 0.66666667 1.
|
|
0.83333333 0.71428571 1. 0.83333333]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_precision
|
|
value: [0.97727273 1. 1. 1. 1. 0.95454545
|
|
0.95652174 1. 0.97826087 0.97826087]
|
|
|
|
mean value: 0.9844861660079052
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.8 0.8 0.6 1. 1. 0.8 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_recall
|
|
value: [0.95555556 0.97777778 0.95555556 1. 1. 0.93333333
|
|
0.97777778 0.97777778 1. 1. ]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.9 0.9 0.6 0.7 0.8 0.9 0.8 0.9 0.9]
|
|
|
|
mean value: 0.8200000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.96666667 0.98888889 0.97777778 1. 1. 0.94444444
|
|
0.96666667 0.98888889 0.98888889 0.98888889]
|
|
|
|
mean value: 0.9811111111111112
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.83333333 0.83333333 0.5 0.57142857 0.6
|
|
0.83333333 0.71428571 0.8 0.83333333]
|
|
|
|
mean value: 0.7233333333333334
|
|
|
|
key: train_jcc
|
|
value: [0.93478261 0.97777778 0.95555556 1. 1. 0.89361702
|
|
0.93617021 0.97777778 0.97826087 0.97826087]
|
|
|
|
mean value: 0.9632202692979751
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09050202 0.08575082 0.08697701 0.08462596 0.08410382 0.0856297
|
|
0.08490443 0.08724713 0.08709764 0.08831048]
|
|
|
|
mean value: 0.08651490211486816
|
|
|
|
key: score_time
|
|
value: [0.01545334 0.01579857 0.01585507 0.01584077 0.01450062 0.01623821
|
|
0.01575541 0.01573849 0.01574063 0.01583362]
|
|
|
|
mean value: 0.0156754732131958
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 0.81649658 0.65465367 0.81649658 1.
|
|
0.81649658 0.81649658 0.81649658 0.81649658]
|
|
|
|
mean value: 0.8186626318129786
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.9 0.8 0.9 1. 0.9 0.9 0.9 0.9]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.90909091 0.83333333 0.90909091 1.
|
|
0.88888889 0.90909091 0.88888889 0.90909091]
|
|
|
|
mean value: 0.9065656565656566
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.83333333 0.71428571 0.83333333 1.
|
|
1. 0.83333333 1. 0.83333333]
|
|
|
|
mean value: 0.8714285714285714
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.96
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.9 0.8 0.9 1. 0.9 0.9 0.9 0.9]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.83333333 0.71428571 0.83333333 1.
|
|
0.8 0.83333333 0.8 0.83333333]
|
|
|
|
mean value: 0.8314285714285714
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03332639 0.03159523 0.0316155 0.02976799 0.02722383 0.02606964
|
|
0.02668548 0.0271318 0.03500819 0.02735019]
|
|
|
|
mean value: 0.029577422142028808
|
|
|
|
key: score_time
|
|
value: [0.01860285 0.01750827 0.02369905 0.02393627 0.01593733 0.01683497
|
|
0.01794434 0.0177505 0.02326179 0.02694941]
|
|
|
|
mean value: 0.020242476463317872
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 1. 0.81649658 0.65465367 0.81649658 1.
|
|
0.81649658 0.81649658 0.81649658 0.81649658]
|
|
|
|
mean value: 0.8208286826982311
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.97801929 1. 1. 0.97801929
|
|
1. 0.97801929 1. 1. ]
|
|
|
|
mean value: 0.9934057881530954
|
|
|
|
key: test_accuracy
|
|
value: [0.8 1. 0.9 0.8 0.9 1. 0.9 0.9 0.9 0.9]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.98888889 1. 1. 0.98888889
|
|
1. 0.98888889 1. 1. ]
|
|
|
|
mean value: 0.9966666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 1. 0.90909091 0.83333333 0.88888889 1.
|
|
0.88888889 0.90909091 0.88888889 0.90909091]
|
|
|
|
mean value: 0.906060606060606
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.98901099 1. 1. 0.98901099
|
|
1. 0.98901099 1. 1. ]
|
|
|
|
mean value: 0.9967032967032967
|
|
|
|
key: test_precision
|
|
value: [0.71428571 1. 0.83333333 0.71428571 1. 1.
|
|
1. 0.83333333 1. 0.83333333]
|
|
|
|
mean value: 0.8928571428571429
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.97826087 1. 1. 0.97826087
|
|
1. 0.97826087 1. 1. ]
|
|
|
|
mean value: 0.9934782608695653
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.8 1. 0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.9400000000000001
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 1. 0.9 0.8 0.9 1. 0.9 0.9 0.9 0.9]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.98888889 1. 1. 0.98888889
|
|
1. 0.98888889 1. 1. ]
|
|
|
|
mean value: 0.9966666666666666
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 1. 0.83333333 0.71428571 0.8 1.
|
|
0.8 0.83333333 0.8 0.83333333]
|
|
|
|
mean value: 0.8328571428571429
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.97826087 1. 1. 0.97826087
|
|
1. 0.97826087 1. 1. ]
|
|
|
|
mean value: 0.9934782608695653
|
|
|
|
MCC on Blind test: 0.95
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01528788 0.01607227 0.02439117 0.01962614 0.01726866 0.01696539
|
|
0.01694345 0.01618481 0.01628375 0.01687813]
|
|
|
|
mean value: 0.01759016513824463
|
|
|
|
key: score_time
|
|
value: [0.01140237 0.01114202 0.01187825 0.0118506 0.01176071 0.0119977
|
|
0.01188946 0.01196241 0.01185942 0.01186037]
|
|
|
|
mean value: 0.011760330200195313
|
|
|
|
key: test_mcc
|
|
value: [0.6 0.65465367 0.81649658 0.21821789 0.6 0.81649658
|
|
0.40824829 0.5 0.40824829 0.40824829]
|
|
|
|
mean value: 0.543060959419101
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.8 0.9 0.6 0.8 0.9 0.7 0.7 0.7 0.7]
|
|
|
|
mean value: 0.76
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.83333333 0.90909091 0.66666667 0.8 0.88888889
|
|
0.66666667 0.76923077 0.72727273 0.66666667]
|
|
|
|
mean value: 0.7727816627816628
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.71428571 0.83333333 0.57142857 0.8 1.
|
|
0.75 0.625 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7510714285714286
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 1. 1. 0.8 0.8 0.8 0.6 1. 0.8 0.6]
|
|
|
|
mean value: 0.8200000000000001
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.8 0.9 0.6 0.8 0.9 0.7 0.7 0.7 0.7]
|
|
|
|
mean value: 0.76
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.71428571 0.83333333 0.5 0.66666667 0.8
|
|
0.5 0.625 0.57142857 0.5 ]
|
|
|
|
mean value: 0.6377380952380952
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17597413 0.1721468 0.17924476 0.17421913 0.17393565 0.17166734
|
|
0.14058495 0.17408228 0.17574048 0.17127013]
|
|
|
|
mean value: 0.17088656425476073
|
|
|
|
key: score_time
|
|
value: [0.00916481 0.00947452 0.00913882 0.00930166 0.00918031 0.00899935
|
|
0.0091269 0.00965333 0.00991249 0.00923991]
|
|
|
|
mean value: 0.009319210052490234
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 1. 0.81649658 1. 0.6 1.
|
|
0.81649658 0.81649658 0.81649658 0.81649658]
|
|
|
|
mean value: 0.8498979485566356
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 1. 0.9 1. 0.8 1. 0.9 0.9 0.9 0.9]
|
|
|
|
mean value: 0.92
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 1. 0.90909091 1. 0.8 1.
|
|
0.88888889 0.90909091 0.88888889 0.90909091]
|
|
|
|
mean value: 0.9214141414141415
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 1. 0.83333333 1. 0.8 1.
|
|
1. 0.83333333 1. 0.83333333]
|
|
|
|
mean value: 0.9133333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.8 1. 0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.9400000000000001
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 1. 0.9 1. 0.8 1. 0.9 0.9 0.9 0.9]
|
|
|
|
mean value: 0.92
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 1. 0.83333333 1. 0.66666667 1.
|
|
0.8 0.83333333 0.8 0.83333333]
|
|
|
|
mean value: 0.86
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.95
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01125002 0.01427794 0.01427984 0.01602817 0.01415706 0.01421785
|
|
0.0143075 0.01926303 0.01412845 0.01442242]
|
|
|
|
mean value: 0.01463322639465332
|
|
|
|
key: score_time
|
|
value: [0.01145935 0.01170135 0.01169324 0.01170659 0.01228118 0.01166224
|
|
0.01343036 0.01290202 0.01216722 0.01210403]
|
|
|
|
mean value: 0.012110757827758788
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 1. 1. 0.81649658 0.81649658 0.65465367
|
|
0.5 0.81649658 0.65465367 0.33333333]
|
|
|
|
mean value: 0.7408626998460192
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 1. 1. 0.9 0.9 0.8 0.7 0.9 0.8 0.6]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 1. 1. 0.88888889 0.88888889 0.75
|
|
0.57142857 0.88888889 0.75 0.33333333]
|
|
|
|
mean value: 0.7960317460317461
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 1. 1. 0.8 0.8 0.6 0.4 0.8 0.6 0.2]
|
|
|
|
mean value: 0.7000000000000001
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 1. 1. 0.9 0.9 0.8 0.7 0.9 0.8 0.6]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 1. 1. 0.8 0.8 0.6 0.4 0.8 0.6 0.2]
|
|
|
|
mean value: 0.7000000000000001
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03190494 0.0162518 0.01298714 0.02096915 0.02571321 0.07055092
|
|
0.01573253 0.01286387 0.01908612 0.01534081]
|
|
|
|
mean value: 0.024140048027038574
|
|
|
|
key: score_time
|
|
value: [0.0120213 0.01187372 0.01150703 0.01177883 0.0234704 0.01707196
|
|
0.01153183 0.01159644 0.01922894 0.01779103]
|
|
|
|
mean value: 0.01478714942932129
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 0.81649658 0.40824829 0.6 0.65465367
|
|
0.65465367 0.81649658 0.81649658 0.81649658]
|
|
|
|
mean value: 0.7216535117446173
|
|
|
|
key: train_mcc
|
|
value: [0.97801929 0.97801929 0.97801929 1. 1. 0.97801929
|
|
0.95555556 0.97801929 1. 0.97801929]
|
|
|
|
mean value: 0.9823671318617464
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.9 0.7 0.8 0.8 0.8 0.9 0.9 0.9]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_accuracy
|
|
value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889
|
|
0.97777778 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.9911111111111112
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.90909091 0.72727273 0.8 0.83333333
|
|
0.83333333 0.90909091 0.88888889 0.90909091]
|
|
|
|
mean value: 0.8628282828282828
|
|
|
|
key: train_fscore
|
|
value: [0.98901099 0.98901099 0.98901099 1. 1. 0.98901099
|
|
0.97777778 0.98901099 1. 0.98901099]
|
|
|
|
mean value: 0.9911843711843712
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.83333333 0.66666667 0.8 0.71428571
|
|
0.71428571 0.83333333 1. 0.83333333]
|
|
|
|
mean value: 0.8061904761904762
|
|
|
|
key: train_precision
|
|
value: [0.97826087 0.97826087 0.97826087 1. 1. 0.97826087
|
|
0.97777778 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9847342995169082
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.8 0.8 1. 1. 1. 0.8 1. ]
|
|
|
|
mean value: 0.9400000000000001
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.97777778 1. 1. 1. ]
|
|
|
|
mean value: 0.9977777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.9 0.7 0.8 0.8 0.8 0.9 0.9 0.9]
|
|
|
|
mean value: 0.8500000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889
|
|
0.97777778 0.98888889 1. 0.98888889]
|
|
|
|
mean value: 0.991111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.83333333 0.57142857 0.66666667 0.71428571
|
|
0.71428571 0.83333333 0.8 0.83333333]
|
|
|
|
mean value: 0.7633333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.97826087 0.97826087 0.97826087 1. 1. 0.97826087
|
|
0.95652174 0.97826087 1. 0.97826087]
|
|
|
|
mean value: 0.9826086956521739
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:188: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:191: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.26709867 0.15390754 0.17872667 0.26606345 0.15658689 0.17817855
|
|
0.16672301 0.15290213 0.25758982 0.18749452]
|
|
|
|
mean value: 0.1965271234512329
|
|
|
|
key: score_time
|
|
value: [0.0125277 0.01932573 0.02402663 0.02339983 0.02723026 0.01232576
|
|
0.02062273 0.02069354 0.02367878 0.02091861]
|
|
|
|
mean value: 0.020474958419799804
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.81649658 0.81649658 0.40824829 0.40824829 0.65465367
|
|
0.65465367 0.81649658 0.81649658 0.65465367]
|
|
|
|
mean value: 0.6862940497690287
|
|
|
|
key: train_mcc
|
|
value: [0.97801929 0.97801929 0.97801929 1. 1. 0.97801929
|
|
0.95555556 0.97801929 1. 1. ]
|
|
|
|
mean value: 0.9845652024773813
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.8]
|
|
|
|
mean value: 0.8300000000000001
|
|
|
|
key: train_accuracy
|
|
value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889
|
|
0.97777778 0.98888889 1. 1. ]
|
|
|
|
mean value: 0.9922222222222222
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.90909091 0.72727273 0.72727273 0.83333333
|
|
0.83333333 0.90909091 0.88888889 0.83333333]
|
|
|
|
mean value: 0.847979797979798
|
|
|
|
key: train_fscore
|
|
value: [0.98901099 0.98901099 0.98901099 1. 1. 0.98901099
|
|
0.97777778 0.98901099 1. 1. ]
|
|
|
|
mean value: 0.9922832722832723
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.83333333 0.66666667 0.66666667 0.71428571
|
|
0.71428571 0.83333333 1. 0.71428571]
|
|
|
|
mean value: 0.780952380952381
|
|
|
|
key: train_precision
|
|
value: [0.97826087 0.97826087 0.97826087 1. 1. 0.97826087
|
|
0.97777778 0.97826087 1. 1. ]
|
|
|
|
mean value: 0.9869082125603865
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.8 0.8 1. 1. 1. 0.8 1. ]
|
|
|
|
mean value: 0.9400000000000001
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.97777778 1. 1. 1. ]
|
|
|
|
mean value: 0.9977777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.8]
|
|
|
|
mean value: 0.8300000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889
|
|
0.97777778 0.98888889 1. 1. ]
|
|
|
|
mean value: 0.9922222222222222
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.83333333 0.57142857 0.57142857 0.71428571
|
|
0.71428571 0.83333333 0.8 0.71428571]
|
|
|
|
mean value: 0.741904761904762
|
|
|
|
key: train_jcc
|
|
value: [0.97826087 0.97826087 0.97826087 1. 1. 0.97826087
|
|
0.95652174 0.97826087 1. 1. ]
|
|
|
|
mean value: 0.9847826086956522
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.78
|