18915 lines
904 KiB
Text
18915 lines
904 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_8020.py:549: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 531
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 531
|
|
ncols: 286
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 263
|
|
log10_or_mychisq 263
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 167
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 174
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification: 80/20
|
|
Train data size: (95, 174)
|
|
Test data size: (24, 174)
|
|
y_train numbers: Counter({0: 61, 1: 34})
|
|
y_train ratio: 1.7941176470588236
|
|
|
|
y_test_numbers: Counter({0: 15, 1: 9})
|
|
y_test ratio: 1.6666666666666667
|
|
-------------------------------------------------------------
|
|
|
|
Simple Random OverSampling
|
|
Counter({1: 61, 0: 61})
|
|
(122, 174)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 34, 1: 34})
|
|
(68, 174)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 61, 1: 61})
|
|
(122, 174)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({1: 61, 0: 61})
|
|
(122, 174)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: 80/20 split
|
|
Gene name: gid
|
|
Drug name: streptomycin
|
|
|
|
Output directory: /home/tanu/git/Data/streptomycin/output/ml/tts_8020/
|
|
Sanity checks:
|
|
ML source data size: (119, 174)
|
|
Total input features: (95, 174)
|
|
Target feature numbers: Counter({0: 61, 1: 34})
|
|
Target features ratio: 1.7941176470588236
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 35
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_na_affinity']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0305469 0.02698874 0.03995991 0.03127837 0.02630901 0.02897191
|
|
0.0300622 0.03244019 0.03317738 0.04694223]
|
|
|
|
mean value: 0.032667684555053714
|
|
|
|
key: score_time
|
|
value: [0.0119803 0.01158261 0.0117631 0.01166391 0.01161766 0.01175451
|
|
0.01187229 0.01174164 0.01191401 0.01204157]
|
|
|
|
mean value: 0.011793160438537597
|
|
|
|
key: test_mcc
|
|
value: [0.10206207 1. 0.16666667 0.61237244 0.76376262 0.75592895
|
|
0.75592895 0. 0.79056942 0.18898224]
|
|
|
|
mean value: 0.5136273334388017
|
|
|
|
key: train_mcc
|
|
value: [0.87400737 0.84541049 0.81997783 0.92244815 0.84800211 0.84888261
|
|
0.84888261 0.92547676 0.84888261 0.87379429]
|
|
|
|
mean value: 0.8655764811972638
|
|
|
|
key: test_accuracy
|
|
value: [0.6 1. 0.6 0.8 0.9 0.88888889
|
|
0.88888889 0.55555556 0.88888889 0.66666667]
|
|
|
|
mean value: 0.7788888888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.94117647 0.92941176 0.91764706 0.96470588 0.92941176 0.93023256
|
|
0.93023256 0.96511628 0.93023256 0.94186047]
|
|
|
|
mean value: 0.9380027359781122
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.33333333 1. 0.5 0.66666667 0.8 0.8
|
|
0.8 0.33333333 0.85714286 0.4 ]
|
|
|
|
mean value: 0.6490476190476191
|
|
|
|
key: train_fscore
|
|
value: [0.90909091 0.89285714 0.87272727 0.94915254 0.89655172 0.89655172
|
|
0.89655172 0.94915254 0.89655172 0.91525424]
|
|
|
|
mean value: 0.9074441543260947
|
|
|
|
key: test_precision
|
|
value: [0.5 1. 0.5 1. 1. 1.
|
|
1. 0.33333333 0.75 0.5 ]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 0.96153846 0.96 0.96551724 0.96296296 0.96296296
|
|
0.96296296 1. 0.96296296 0.96428571]
|
|
|
|
mean value: 0.9703193269055338
|
|
|
|
key: test_recall
|
|
value: [0.25 1. 0.5 0.5 0.66666667 0.66666667
|
|
0.66666667 0.33333333 1. 0.33333333]
|
|
|
|
mean value: 0.5916666666666667
|
|
|
|
key: train_recall
|
|
value: [0.83333333 0.83333333 0.8 0.93333333 0.83870968 0.83870968
|
|
0.83870968 0.90322581 0.83870968 0.87096774]
|
|
|
|
mean value: 0.8529032258064516
|
|
|
|
key: test_roc_auc
|
|
value: [0.54166667 1. 0.58333333 0.75 0.83333333 0.83333333
|
|
0.83333333 0.5 0.91666667 0.58333333]
|
|
|
|
mean value: 0.7375
|
|
|
|
key: train_roc_auc
|
|
value: [0.91666667 0.90757576 0.89090909 0.95757576 0.91009558 0.91026393
|
|
0.91026393 0.9516129 0.91026393 0.92639296]
|
|
|
|
mean value: 0.9191620506136635
|
|
|
|
key: test_jcc
|
|
value: [0.2 1. 0.33333333 0.5 0.66666667 0.66666667
|
|
0.66666667 0.2 0.75 0.25 ]
|
|
|
|
mean value: 0.5233333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.83333333 0.80645161 0.77419355 0.90322581 0.8125 0.8125
|
|
0.8125 0.90322581 0.8125 0.84375 ]
|
|
|
|
mean value: 0.8314180107526882
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.6305654 0.64203525 0.80122828 0.83246946 0.99172902 0.6338408
|
|
0.65456843 0.54251766 0.75566173 0.57543039]
|
|
|
|
mean value: 0.7060046434402466
|
|
|
|
key: score_time
|
|
value: [0.01303911 0.01509404 0.0218246 0.0119071 0.01349711 0.01350045
|
|
0.01355267 0.0152142 0.01325059 0.01344347]
|
|
|
|
mean value: 0.014432334899902343
|
|
|
|
key: test_mcc
|
|
value: [0.10206207 1. 0.58333333 0.61237244 1. 0.31622777
|
|
0.79056942 0. 1. 0.5 ]
|
|
|
|
mean value: 0.5904565022704027
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 0.92244815 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9922448146872876
|
|
|
|
key: test_accuracy
|
|
value: [0.6 1. 0.8 0.8 1. 0.66666667
|
|
0.88888889 0.55555556 1. 0.77777778]
|
|
|
|
mean value: 0.8088888888888889
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 0.96470588 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9964705882352941
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 1. 0.75 0.66666667 1. 0.57142857
|
|
0.85714286 0.33333333 1. 0.66666667]
|
|
|
|
mean value: 0.7178571428571429
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 0.94915254 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9949152542372881
|
|
|
|
key: test_precision
|
|
value: [0.5 1. 0.75 1. 1. 0.5
|
|
0.75 0.33333333 1. 0.66666667]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.96551724 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.996551724137931
|
|
|
|
key: test_recall
|
|
value: [0.25 1. 0.75 0.5 1. 0.66666667
|
|
1. 0.33333333 1. 0.66666667]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.93333333 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9933333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.54166667 1. 0.79166667 0.75 1. 0.66666667
|
|
0.91666667 0.5 1. 0.75 ]
|
|
|
|
mean value: 0.7916666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 0.95757576 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9957575757575757
|
|
|
|
key: test_jcc
|
|
value: [0.2 1. 0.6 0.5 1. 0.4 0.75 0.2 1. 0.5 ]
|
|
|
|
mean value: 0.615
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 0.90322581 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9903225806451613
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01273799 0.00977111 0.00878263 0.00947881 0.00864267 0.00817752
|
|
0.00824571 0.00992274 0.01026082 0.0092926 ]
|
|
|
|
mean value: 0.009531259536743164
|
|
|
|
key: score_time
|
|
value: [0.0123117 0.00888801 0.00865459 0.00866389 0.00831413 0.0083015
|
|
0.00889564 0.00979996 0.00948215 0.00917625]
|
|
|
|
mean value: 0.009248781204223632
|
|
|
|
key: test_mcc
|
|
value: [-0.40824829 0.53452248 0. -0.66666667 -0.04761905 0.25
|
|
0. -0.15811388 0.37796447 -0.31622777]
|
|
|
|
mean value: -0.043438869694075816
|
|
|
|
key: train_mcc
|
|
value: [0.4572923 0.37569908 0.50566869 0.37569908 0.35945901 0.44300062
|
|
0.38182065 0.54408523 0.43304776 0.39890654]
|
|
|
|
mean value: 0.4274678964568305
|
|
|
|
key: test_accuracy
|
|
value: [0.3 0.7 0.5 0.2 0.4 0.44444444
|
|
0.44444444 0.44444444 0.55555556 0.33333333]
|
|
|
|
mean value: 0.43222222222222223
|
|
|
|
key: train_accuracy
|
|
value: [0.67058824 0.62352941 0.75294118 0.62352941 0.61176471 0.6744186
|
|
0.62790698 0.76744186 0.65116279 0.60465116]
|
|
|
|
mean value: 0.6607934336525307
|
|
|
|
key: test_fscore
|
|
value: [0.46153846 0.72727273 0.44444444 0. 0.4 0.54545455
|
|
0.44444444 0.28571429 0.6 0.25 ]
|
|
|
|
mean value: 0.4158868908868909
|
|
|
|
key: train_fscore
|
|
value: [0.66666667 0.62790698 0.69565217 0.62790698 0.62921348 0.66666667
|
|
0.63636364 0.72222222 0.65909091 0.63829787]
|
|
|
|
mean value: 0.6569987583898009
|
|
|
|
key: test_precision
|
|
value: [0.33333333 0.57142857 0.4 0. 0.28571429 0.375
|
|
0.33333333 0.25 0.42857143 0.2 ]
|
|
|
|
mean value: 0.3177380952380952
|
|
|
|
key: train_precision
|
|
value: [0.51851852 0.48214286 0.61538462 0.48214286 0.48275862 0.52830189
|
|
0.49122807 0.63414634 0.50877193 0.47619048]
|
|
|
|
mean value: 0.5219586173324847
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.5 0. 0.66666667 1.
|
|
0.66666667 0.33333333 1. 0.33333333]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_recall
|
|
value: [0.93333333 0.9 0.8 0.9 0.90322581 0.90322581
|
|
0.90322581 0.83870968 0.93548387 0.96774194]
|
|
|
|
mean value: 0.8984946236559139
|
|
|
|
key: test_roc_auc
|
|
value: [0.375 0.75 0.5 0.16666667 0.47619048 0.58333333
|
|
0.5 0.41666667 0.66666667 0.33333333]
|
|
|
|
mean value: 0.47678571428571426
|
|
|
|
key: train_roc_auc
|
|
value: [0.73030303 0.68636364 0.76363636 0.68636364 0.67383513 0.72434018
|
|
0.68797654 0.7829912 0.71319648 0.68387097]
|
|
|
|
mean value: 0.713287715868361
|
|
|
|
key: test_jcc
|
|
value: [0.3 0.57142857 0.28571429 0. 0.25 0.375
|
|
0.28571429 0.16666667 0.42857143 0.14285714]
|
|
|
|
mean value: 0.28059523809523806
|
|
|
|
key: train_jcc
|
|
value: [0.5 0.45762712 0.53333333 0.45762712 0.45901639 0.5
|
|
0.46666667 0.56521739 0.49152542 0.46875 ]
|
|
|
|
mean value: 0.489976344576392
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00989056 0.00908041 0.0099144 0.0095005 0.00922489 0.00933409
|
|
0.00946307 0.00883198 0.00947952 0.00972533]
|
|
|
|
mean value: 0.009444475173950195
|
|
|
|
key: score_time
|
|
value: [0.0099194 0.0086503 0.00956011 0.00902653 0.00926256 0.00929809
|
|
0.00923109 0.0085485 0.00949311 0.00931835]
|
|
|
|
mean value: 0.009230804443359376
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.35634832 0.10206207 -0.08908708 0.50917508 0.5
|
|
0.75592895 0. 0. 0. ]
|
|
|
|
mean value: 0.213442733776416
|
|
|
|
key: train_mcc
|
|
value: [0.60436722 0.49011543 0.60829929 0.61545745 0.55924814 0.61858419
|
|
0.55892608 0.58798186 0.53123917 0.6699368 ]
|
|
|
|
mean value: 0.584415564131712
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.7 0.6 0.5 0.8 0.77777778
|
|
0.88888889 0.55555556 0.66666667 0.55555556]
|
|
|
|
mean value: 0.6644444444444444
|
|
|
|
key: train_accuracy
|
|
value: [0.82352941 0.77647059 0.82352941 0.82352941 0.8 0.8255814
|
|
0.80232558 0.81395349 0.79069767 0.84883721]
|
|
|
|
mean value: 0.8128454172366621
|
|
|
|
key: test_fscore
|
|
value: [0. 0.57142857 0.33333333 0.28571429 0.5 0.5
|
|
0.8 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.3657142857142857
|
|
|
|
key: train_fscore
|
|
value: [0.70588235 0.6122449 0.69387755 0.68085106 0.66666667 0.70588235
|
|
0.67924528 0.69230769 0.66666667 0.75471698]
|
|
|
|
mean value: 0.6858341508483701
|
|
|
|
key: test_precision
|
|
value: [0. 0.66666667 0.5 0.33333333 1. 1.
|
|
1. 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.5166666666666666
|
|
|
|
key: train_precision
|
|
value: [0.85714286 0.78947368 0.89473684 0.94117647 0.85 0.9
|
|
0.81818182 0.85714286 0.7826087 0.90909091]
|
|
|
|
mean value: 0.859955413411464
|
|
|
|
key: test_recall
|
|
value: [0. 0.5 0.25 0.25 0.33333333 0.33333333
|
|
0.66666667 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.3
|
|
|
|
key: train_recall
|
|
value: [0.6 0.5 0.56666667 0.53333333 0.5483871 0.58064516
|
|
0.58064516 0.58064516 0.58064516 0.64516129]
|
|
|
|
mean value: 0.5716129032258065
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.66666667 0.54166667 0.45833333 0.66666667 0.66666667
|
|
0.83333333 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.5833333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.77272727 0.71363636 0.76515152 0.75757576 0.74641577 0.77214076
|
|
0.75395894 0.76304985 0.74486804 0.80439883]
|
|
|
|
mean value: 0.7593923101987617
|
|
|
|
key: test_jcc
|
|
value: [0. 0.4 0.2 0.16666667 0.33333333 0.33333333
|
|
0.66666667 0.2 0. 0.2 ]
|
|
|
|
mean value: 0.25
|
|
|
|
key: train_jcc
|
|
value: [0.54545455 0.44117647 0.53125 0.51612903 0.5 0.54545455
|
|
0.51428571 0.52941176 0.5 0.60606061]
|
|
|
|
mean value: 0.5229222678807594
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00840855 0.01174235 0.00868917 0.00922012 0.00913095 0.00926518
|
|
0.00947428 0.00957751 0.00937104 0.00982618]
|
|
|
|
mean value: 0.009470534324645997
|
|
|
|
key: score_time
|
|
value: [0.04801059 0.03372741 0.00971007 0.01017523 0.01003671 0.01013446
|
|
0.0100739 0.01024842 0.01035857 0.01027942]
|
|
|
|
mean value: 0.016275477409362794
|
|
|
|
key: test_mcc
|
|
value: [ 0.10206207 0.40824829 0.61237244 -0.40824829 0.21821789 0.18898224
|
|
0.5 -0.37796447 0. 0.5 ]
|
|
|
|
mean value: 0.1743670162043139
|
|
|
|
key: train_mcc
|
|
value: [0.49020897 0.54920736 0.55391171 0.6030315 0.44511308 0.50769846
|
|
0.36709581 0.53123917 0.41886333 0.450776 ]
|
|
|
|
mean value: 0.491714539046235
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.7 0.8 0.4 0.7 0.66666667
|
|
0.77777778 0.44444444 0.66666667 0.77777778]
|
|
|
|
mean value: 0.6533333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.77647059 0.8 0.8 0.82352941 0.75294118 0.77906977
|
|
0.72093023 0.79069767 0.74418605 0.75581395]
|
|
|
|
mean value: 0.7743638850889193
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.66666667 0.66666667 0. 0.4 0.4
|
|
0.5 0. 0. 0.5 ]
|
|
|
|
mean value: 0.3466666666666667
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[0.62745098 0.65306122 0.63829787 0.71698113 0.60377358 0.66666667
|
|
0.55555556 0.66666667 0.57692308 0.61818182]
|
|
|
|
mean value: 0.6323558578197295
|
|
|
|
key: test_precision
|
|
value: [0.5 0.6 1. 0. 0.5 0.5 1. 0. 0. 1. ]
|
|
|
|
mean value: 0.51
|
|
|
|
key: train_precision
|
|
value: [0.76190476 0.84210526 0.88235294 0.82608696 0.72727273 0.73076923
|
|
0.65217391 0.7826087 0.71428571 0.70833333]
|
|
|
|
mean value: 0.7627893537117524
|
|
|
|
key: test_recall
|
|
value: [0.25 0.75 0.5 0. 0.33333333 0.33333333
|
|
0.33333333 0. 0. 0.33333333]
|
|
|
|
mean value: 0.2833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.53333333 0.53333333 0.5 0.63333333 0.51612903 0.61290323
|
|
0.48387097 0.58064516 0.48387097 0.5483871 ]
|
|
|
|
mean value: 0.5425806451612903
|
|
|
|
key: test_roc_auc
|
|
value: [0.54166667 0.70833333 0.75 0.33333333 0.5952381 0.58333333
|
|
0.66666667 0.33333333 0.5 0.66666667]
|
|
|
|
mean value: 0.5678571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.72121212 0.73939394 0.73181818 0.78030303 0.70250896 0.74281525
|
|
0.66920821 0.74486804 0.68739003 0.71055718]
|
|
|
|
mean value: 0.7230074942978169
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.5 0.5 0. 0.25 0.25
|
|
0.33333333 0. 0. 0.33333333]
|
|
|
|
mean value: 0.23666666666666666
|
|
|
|
key: train_jcc
|
|
value: [0.45714286 0.48484848 0.46875 0.55882353 0.43243243 0.5
|
|
0.38461538 0.5 0.40540541 0.44736842]
|
|
|
|
mean value: 0.4639386514908961
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01076531 0.01041484 0.01072669 0.0102756 0.01036048 0.01032281
|
|
0.01047754 0.00965858 0.00939202 0.0106647 ]
|
|
|
|
mean value: 0.01030585765838623
|
|
|
|
key: score_time
|
|
value: [0.00990939 0.00981712 0.0099659 0.01044989 0.00972366 0.00973272
|
|
0.009835 0.00915337 0.00969172 0.01013112]
|
|
|
|
mean value: 0.009840989112854004
|
|
|
|
key: test_mcc
|
|
value: [ 0.40824829 0. 0. -0.27216553 0.50917508 0.5
|
|
0.5 0.5 0. 0.18898224]
|
|
|
|
mean value: 0.23342400772098834
|
|
|
|
key: train_mcc
|
|
value: [0.6770032 0.62678317 0.65201286 0.6770032 0.63555257 0.66115063
|
|
0.63681234 0.73323558 0.66115063 0.70931623]
|
|
|
|
mean value: 0.6670020397347396
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.6 0.6 0.5 0.8 0.77777778
|
|
0.77777778 0.77777778 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6866666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.84705882 0.82352941 0.83529412 0.84705882 0.82352941 0.8372093
|
|
0.8255814 0.87209302 0.8372093 0.86046512]
|
|
|
|
mean value: 0.8409028727770178
|
|
|
|
key: test_fscore
|
|
value: [0.4 0. 0. 0. 0.5 0.5 0.5 0.5 0. 0.4]
|
|
|
|
mean value: 0.28
|
|
|
|
key: train_fscore
|
|
value: [0.72340426 0.66666667 0.69565217 0.72340426 0.68085106 0.70833333
|
|
0.68085106 0.78431373 0.70833333 0.76 ]
|
|
|
|
mean value: 0.7131809871034445
|
|
|
|
key: test_precision
|
|
value: [1. 0. 0. 0. 1. 1. 1. 1. 0. 0.5]
|
|
|
|
mean value: 0.55
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.25 0. 0. 0. 0.33333333 0.33333333
|
|
0.33333333 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.19166666666666665
|
|
|
|
key: train_recall
|
|
value: [0.56666667 0.5 0.53333333 0.56666667 0.51612903 0.5483871
|
|
0.51612903 0.64516129 0.5483871 0.61290323]
|
|
|
|
mean value: 0.5553763440860215
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.5 0.5 0.41666667 0.66666667 0.66666667
|
|
0.66666667 0.66666667 0.5 0.58333333]
|
|
|
|
mean value: 0.5791666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.78333333 0.75 0.76666667 0.78333333 0.75806452 0.77419355
|
|
0.75806452 0.82258065 0.77419355 0.80645161]
|
|
|
|
mean value: 0.7776881720430108
|
|
|
|
key: test_jcc
|
|
value: [0.25 0. 0. 0. 0.33333333 0.33333333
|
|
0.33333333 0.33333333 0. 0.25 ]
|
|
|
|
mean value: 0.18333333333333332
|
|
|
|
key: train_jcc
|
|
value: [0.56666667 0.5 0.53333333 0.56666667 0.51612903 0.5483871
|
|
0.51612903 0.64516129 0.5483871 0.61290323]
|
|
|
|
mean value: 0.5553763440860215
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.43049026 0.49415779 0.62812185 0.50049639 0.47115254 0.45890641
|
|
0.56968665 0.37519431 0.49575472 0.44190574]
|
|
|
|
mean value: 0.4865866661071777
|
|
|
|
key: score_time
|
|
value: [0.01223922 0.0122118 0.01218987 0.01226497 0.01246333 0.01227331
|
|
0.01221108 0.01227903 0.01216149 0.01224041]
|
|
|
|
mean value: 0.012253451347351074
|
|
|
|
key: test_mcc
|
|
value: [-0.27216553 0.61237244 0.16666667 0.35634832 0.35634832 0.31622777
|
|
1. 0. 0. 0.18898224]
|
|
|
|
mean value: 0.2724780223007802
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.8 0.6 0.7 0.7 0.66666667
|
|
1. 0.55555556 0.55555556 0.66666667]
|
|
|
|
mean value: 0.6744444444444444
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0.66666667 0.5 0.57142857 0.57142857 0.57142857
|
|
1. 0.33333333 0.33333333 0.4 ]
|
|
|
|
mean value: 0.4947619047619048
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 1. 0.5 0.66666667 0.5 0.5
|
|
1. 0.33333333 0.33333333 0.5 ]
|
|
|
|
mean value: 0.5333333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.5 0.5 0.5 0.66666667 0.66666667
|
|
1. 0.33333333 0.33333333 0.33333333]
|
|
|
|
mean value: 0.48333333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.41666667 0.75 0.58333333 0.66666667 0.69047619 0.66666667
|
|
1. 0.5 0.5 0.58333333]
|
|
|
|
mean value: 0.6357142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0.5 0.33333333 0.4 0.4 0.4
|
|
1. 0.2 0.2 0.25 ]
|
|
|
|
mean value: 0.36833333333333335
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01463723 0.01358485 0.0116775 0.01080513 0.01131773 0.01103783
|
|
0.0112195 0.01001573 0.01102853 0.01110148]
|
|
|
|
mean value: 0.011642551422119141
|
|
|
|
key: score_time
|
|
value: [0.01222396 0.00996161 0.00970626 0.00927424 0.00924468 0.00926971
|
|
0.00877523 0.00926399 0.00918818 0.0093019 ]
|
|
|
|
mean value: 0.009620976448059083
|
|
|
|
key: test_mcc
|
|
value: [0.80178373 0.81649658 0.81649658 0.58333333 0.52380952 0.5
|
|
0.79056942 0.31622777 1. 0.31622777]
|
|
|
|
mean value: 0.6464944691811353
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.9 0.8 0.8 0.77777778
|
|
0.88888889 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.83
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.88888889 0.88888889 0.75 0.66666667 0.5
|
|
0.85714286 0.57142857 1. 0.57142857]
|
|
|
|
mean value: 0.7551587301587301
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 0.8 0.75 0.66666667 1.
|
|
0.75 0.5 1. 0.5 ]
|
|
|
|
mean value: 0.7766666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 0.75 0.66666667 0.33333333
|
|
1. 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.91666667 0.91666667 0.79166667 0.76190476 0.66666667
|
|
0.91666667 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.8178571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.8 0.8 0.6 0.5 0.33333333
|
|
0.75 0.4 1. 0.4 ]
|
|
|
|
mean value: 0.6333333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09195757 0.09825468 0.08989787 0.08141875 0.08256102 0.08198786
|
|
0.08228302 0.08201671 0.08476615 0.08252168]
|
|
|
|
mean value: 0.08576653003692628
|
|
|
|
key: score_time
|
|
value: [0.01890445 0.01950073 0.01849556 0.01760888 0.01668882 0.01768684
|
|
0.01672912 0.01881576 0.01771498 0.01703644]
|
|
|
|
mean value: 0.017918157577514648
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.35634832 0.35634832 0.35634832 1. 0.18898224
|
|
1. 0. 0.5 0.18898224]
|
|
|
|
mean value: 0.4355257731122788
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.7 0.7 0.7 1. 0.66666667
|
|
1. 0.55555556 0.77777778 0.66666667]
|
|
|
|
mean value: 0.7466666666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.57142857 0.57142857 0.57142857 1. 0.4
|
|
1. 0.33333333 0.5 0.4 ]
|
|
|
|
mean value: 0.5747619047619048
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.66666667 0.66666667 1. 0.5
|
|
1. 0.33333333 1. 0.5 ]
|
|
|
|
mean value: 0.7333333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.25 0.5 0.5 0.5 1. 0.33333333
|
|
1. 0.33333333 0.33333333 0.33333333]
|
|
|
|
mean value: 0.5083333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.66666667 0.66666667 0.66666667 1. 0.58333333
|
|
1. 0.5 0.66666667 0.58333333]
|
|
|
|
mean value: 0.6958333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.4 0.4 0.4 1. 0.25
|
|
1. 0.2 0.33333333 0.25 ]
|
|
|
|
mean value: 0.44833333333333336
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00999427 0.01002336 0.009238 0.0086205 0.00972104 0.00898409
|
|
0.00901747 0.00868273 0.00898337 0.00904679]
|
|
|
|
mean value: 0.009231162071228028
|
|
|
|
key: score_time
|
|
value: [0.00914049 0.00980401 0.00973272 0.01032186 0.01000381 0.00882196
|
|
0.00972986 0.00877976 0.00856209 0.00964975]
|
|
|
|
mean value: 0.009454631805419922
|
|
|
|
key: test_mcc
|
|
value: [ 0.40824829 0.10206207 0.58333333 -0.40824829 0.35634832 -0.15811388
|
|
0.75592895 0. -0.15811388 0.15811388]
|
|
|
|
mean value: 0.16395587915092338
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.6 0.8 0.3 0.7 0.44444444
|
|
0.88888889 0.55555556 0.44444444 0.55555556]
|
|
|
|
mean value: 0.5988888888888889
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.33333333 0.75 0.22222222 0.57142857 0.28571429
|
|
0.8 0.33333333 0.28571429 0.5 ]
|
|
|
|
mean value: 0.44817460317460317
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.5 0.75 0.2 0.5 0.25
|
|
1. 0.33333333 0.25 0.4 ]
|
|
|
|
mean value: 0.5183333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.25 0.25 0.75 0.25 0.66666667 0.33333333
|
|
0.66666667 0.33333333 0.33333333 0.66666667]
|
|
|
|
mean value: 0.44999999999999996
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.54166667 0.79166667 0.29166667 0.69047619 0.41666667
|
|
0.83333333 0.5 0.41666667 0.58333333]
|
|
|
|
mean value: 0.569047619047619
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.2 0.6 0.125 0.4 0.16666667
|
|
0.66666667 0.2 0.16666667 0.33333333]
|
|
|
|
mean value: 0.31083333333333335
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.05605578 1.06167626 1.04786205 1.09075642 1.04063153 1.04710412
|
|
1.04251361 1.04319143 1.05921626 1.12373352]
|
|
|
|
mean value: 1.0612740993499756
|
|
|
|
key: score_time
|
|
value: [0.09127831 0.09289479 0.09460998 0.08635402 0.0898025 0.0876472
|
|
0.09085178 0.09048963 0.10437942 0.09341502]
|
|
|
|
mean value: 0.09217226505279541
|
|
|
|
key: test_mcc
|
|
value: [0.61237244 0.40824829 0.35634832 0.61237244 1. 0.18898224
|
|
1. 0. 0.5 0.5 ]
|
|
|
|
mean value: 0.5178323720909965
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.7 0.7 0.8 1. 0.66666667
|
|
1. 0.55555556 0.77777778 0.77777778]
|
|
|
|
mean value: 0.7777777777777778
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.4 0.57142857 0.66666667 1. 0.4
|
|
1. 0.33333333 0.5 0.66666667]
|
|
|
|
mean value: 0.6204761904761905
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.66666667 1. 1. 0.5
|
|
1. 0.33333333 1. 0.66666667]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.25 0.5 0.5 1. 0.33333333
|
|
1. 0.33333333 0.33333333 0.66666667]
|
|
|
|
mean value: 0.5416666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.625 0.66666667 0.75 1. 0.58333333
|
|
1. 0.5 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7291666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.25 0.4 0.5 1. 0.25
|
|
1. 0.2 0.33333333 0.5 ]
|
|
|
|
mean value: 0.49333333333333335
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
|
|
key: fit_time
|
|
value: [1.71200681 0.84919643 0.89473581 0.93872952 0.9066782 0.88126254
|
|
0.86979365 0.95759726 0.9450984 0.90982866]
|
|
|
|
mean value: 0.9864927291870117
|
|
|
|
key: score_time
|
|
value: [0.15694118 0.19528008 0.20258021 0.21864033 0.20269251 0.19596314
|
|
0.20151901 0.17505932 0.12959981 0.15955257]
|
|
|
|
mean value: 0.18378281593322754
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.40824829 0.61237244 0.61237244 0.76376262 0.5
|
|
0.75592895 0. 0.5 0.5 ]
|
|
|
|
mean value: 0.5060933014163743
|
|
|
|
key: train_mcc
|
|
value: [0.87400737 0.92382264 0.84930058 0.89883792 0.90050042 0.90106553
|
|
0.87682222 0.92547676 0.90106553 0.92547676]
|
|
|
|
mean value: 0.8976375719952954
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.7 0.8 0.8 0.9 0.77777778
|
|
0.88888889 0.66666667 0.77777778 0.77777778]
|
|
|
|
mean value: 0.7788888888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.94117647 0.96470588 0.92941176 0.95294118 0.95294118 0.95348837
|
|
0.94186047 0.96511628 0.95348837 0.96511628]
|
|
|
|
mean value: 0.9520246238030096
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.4 0.66666667 0.66666667 0.8 0.5
|
|
0.8 0. 0.5 0.66666667]
|
|
|
|
mean value: 0.54
|
|
|
|
key: train_fscore
|
|
value: [0.90909091 0.94736842 0.88888889 0.92857143 0.93103448 0.93103448
|
|
0.9122807 0.94915254 0.93103448 0.94915254]
|
|
|
|
mean value: 0.9277608882379869
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0. 1. 0.66666667]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.25 0.25 0.5 0.5 0.66666667 0.33333333
|
|
0.66666667 0. 0.33333333 0.66666667]
|
|
|
|
mean value: 0.41666666666666663
|
|
|
|
key: train_recall
|
|
value: [0.83333333 0.9 0.8 0.86666667 0.87096774 0.87096774
|
|
0.83870968 0.90322581 0.87096774 0.90322581]
|
|
|
|
mean value: 0.8658064516129033
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.625 0.75 0.75 0.83333333 0.66666667
|
|
0.83333333 0.5 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_roc_auc
|
|
value: [0.91666667 0.95 0.9 0.93333333 0.93548387 0.93548387
|
|
0.91935484 0.9516129 0.93548387 0.9516129 ]
|
|
|
|
mean value: 0.9329032258064517
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.25 0.5 0.5 0.66666667 0.33333333
|
|
0.66666667 0. 0.33333333 0.5 ]
|
|
|
|
mean value: 0.39999999999999997
|
|
|
|
key: train_jcc
|
|
value: [0.83333333 0.9 0.8 0.86666667 0.87096774 0.87096774
|
|
0.83870968 0.90322581 0.87096774 0.90322581]
|
|
|
|
mean value: 0.8658064516129033
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02308393 0.0088551 0.00888968 0.00859785 0.008533 0.00856447
|
|
0.0095396 0.01004243 0.01001215 0.00988889]
|
|
|
|
mean value: 0.010600709915161132
|
|
|
|
key: score_time
|
|
value: [0.01271439 0.00862908 0.0085578 0.00848007 0.00839162 0.00842977
|
|
0.0094707 0.00961733 0.009341 0.00940299]
|
|
|
|
mean value: 0.009303474426269531
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.35634832 0.10206207 -0.08908708 0.50917508 0.5
|
|
0.75592895 0. 0. 0. ]
|
|
|
|
mean value: 0.213442733776416
|
|
|
|
key: train_mcc
|
|
value: [0.60436722 0.49011543 0.60829929 0.61545745 0.55924814 0.61858419
|
|
0.55892608 0.58798186 0.53123917 0.6699368 ]
|
|
|
|
mean value: 0.584415564131712
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.7 0.6 0.5 0.8 0.77777778
|
|
0.88888889 0.55555556 0.66666667 0.55555556]
|
|
|
|
mean value: 0.6644444444444444
|
|
|
|
key: train_accuracy
|
|
value: [0.82352941 0.77647059 0.82352941 0.82352941 0.8 0.8255814
|
|
0.80232558 0.81395349 0.79069767 0.84883721]
|
|
|
|
mean value: 0.8128454172366621
|
|
|
|
key: test_fscore
|
|
value: [0. 0.57142857 0.33333333 0.28571429 0.5 0.5
|
|
0.8 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.3657142857142857
|
|
|
|
key: train_fscore
|
|
value: [0.70588235 0.6122449 0.69387755 0.68085106 0.66666667 0.70588235
|
|
0.67924528 0.69230769 0.66666667 0.75471698]
|
|
|
|
mean value: 0.6858341508483701
|
|
|
|
key: test_precision
|
|
value: [0. 0.66666667 0.5 0.33333333 1. 1.
|
|
1. 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.5166666666666666
|
|
|
|
key: train_precision
|
|
value: [0.85714286 0.78947368 0.89473684 0.94117647 0.85 0.9
|
|
0.81818182 0.85714286 0.7826087 0.90909091]
|
|
|
|
mean value: 0.859955413411464
|
|
|
|
key: test_recall
|
|
value: [0. 0.5 0.25 0.25 0.33333333 0.33333333
|
|
0.66666667 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.3
|
|
|
|
key: train_recall
|
|
value: [0.6 0.5 0.56666667 0.53333333 0.5483871 0.58064516
|
|
0.58064516 0.58064516 0.58064516 0.64516129]
|
|
|
|
mean value: 0.5716129032258065
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.66666667 0.54166667 0.45833333 0.66666667 0.66666667
|
|
0.83333333 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.5833333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.77272727 0.71363636 0.76515152 0.75757576 0.74641577 0.77214076
|
|
0.75395894 0.76304985 0.74486804 0.80439883]
|
|
|
|
mean value: 0.7593923101987617
|
|
|
|
key: test_jcc
|
|
value: [0. 0.4 0.2 0.16666667 0.33333333 0.33333333
|
|
0.66666667 0.2 0. 0.2 ]
|
|
|
|
mean value: 0.25
|
|
|
|
key: train_jcc
|
|
value: [0.54545455 0.44117647 0.53125 0.51612903 0.5 0.54545455
|
|
0.51428571 0.52941176 0.5 0.60606061]
|
|
|
|
mean value: 0.5229222678807594
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.07264853 0.04024744 0.05866051 0.03784394 0.19269013 0.04648733
|
|
0.19036412 0.05203319 0.24832368 0.04049802]
|
|
|
|
mean value: 0.09797968864440917
|
|
|
|
key: score_time
|
|
value: [0.0106523 0.01081157 0.01139903 0.02005935 0.01160455 0.01053858
|
|
0.01291728 0.01065969 0.01079082 0.01054072]
|
|
|
|
mean value: 0.011997389793395995
|
|
|
|
key: test_mcc
|
|
value: [0.80178373 1. 1. 0.80178373 0.80178373 1.
|
|
0.79056942 0.5 1. 0.5 ]
|
|
|
|
mean value: 0.8195920592253915
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 1. 1. 0.9 0.9 1.
|
|
0.88888889 0.77777778 1. 0.77777778]
|
|
|
|
mean value: 0.9144444444444444
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 1. 1. 0.85714286 0.85714286 1.
|
|
0.85714286 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.8761904761904762
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 0.75 1.
|
|
0.75 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.8833333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 0.75 1. 1.
|
|
1. 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.8833333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 1. 1. 0.875 0.92857143 1.
|
|
0.91666667 0.75 1. 0.75 ]
|
|
|
|
mean value: 0.9095238095238095
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 1. 1. 0.75 0.75 1. 0.75 0.5 1. 0.5 ]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 1.0
|
|
|
|
Accuracy on Blind test: 1.0
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03847551 0.01781559 0.03719854 0.0455792 0.06547785 0.01787257
|
|
0.01816368 0.05660486 0.03784299 0.0435307 ]
|
|
|
|
mean value: 0.03785614967346192
|
|
|
|
key: score_time
|
|
value: [0.01195216 0.01164412 0.02182126 0.02141857 0.01197553 0.01159596
|
|
0.01156759 0.02123332 0.02323461 0.02098775]
|
|
|
|
mean value: 0.01674308776855469
|
|
|
|
key: test_mcc
|
|
value: [-0.08908708 0.58333333 0.40824829 0.10206207 0.21821789 0.15811388
|
|
0.5 0.15811388 0. 0.18898224]
|
|
|
|
mean value: 0.22279845085331312
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97437404 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9974374035935257
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.8 0.6 0.6 0.6 0.55555556
|
|
0.77777778 0.55555556 0.55555556 0.66666667]
|
|
|
|
mean value: 0.6211111111111112
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98823529 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9988235294117647
|
|
|
|
key: test_fscore
|
|
value: [0.28571429 0.75 0.66666667 0.33333333 0.5 0.5
|
|
0.66666667 0.5 0.33333333 0.4 ]
|
|
|
|
mean value: 0.49357142857142855
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98305085 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9983050847457627
|
|
|
|
key: test_precision
|
|
value: [0.33333333 0.75 0.5 0.5 0.4 0.4
|
|
0.66666667 0.4 0.33333333 0.5 ]
|
|
|
|
mean value: 0.47833333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.25 0.75 1. 0.25 0.66666667 0.66666667
|
|
0.66666667 0.66666667 0.33333333 0.33333333]
|
|
|
|
mean value: 0.5583333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 0.96666667 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9966666666666667
|
|
|
|
key: test_roc_auc
|
|
value: [0.45833333 0.79166667 0.66666667 0.54166667 0.61904762 0.58333333
|
|
0.75 0.58333333 0.5 0.58333333]
|
|
|
|
mean value: 0.6077380952380953
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98333333 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9983333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.16666667 0.6 0.5 0.2 0.33333333 0.33333333
|
|
0.5 0.33333333 0.2 0.25 ]
|
|
|
|
mean value: 0.3416666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 0.96666667 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9966666666666667
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01405334 0.01005578 0.0094142 0.00955582 0.0101738 0.0088737
|
|
0.00851607 0.00905228 0.00850415 0.00887322]
|
|
|
|
mean value: 0.009707236289978027
|
|
|
|
key: score_time
|
|
value: [0.01176906 0.00879908 0.00967574 0.00959611 0.00920868 0.00871491
|
|
0.00835419 0.00853872 0.00896287 0.00902462]
|
|
|
|
mean value: 0.009264397621154784
|
|
|
|
key: test_mcc
|
|
value: [ 0.58333333 0.81649658 -0.08908708 0.10206207 0.04761905 0.75592895
|
|
0.75592895 0. 1. 0.18898224]
|
|
|
|
mean value: 0.416126408240012
|
|
|
|
key: train_mcc
|
|
value: [0.49220409 0.52104078 0.46288643 0.49580055 0.58380666 0.55876134
|
|
0.58595363 0.47652472 0.50476867 0.61443144]
|
|
|
|
mean value: 0.5296178301813469
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9 0.5 0.6 0.6 0.88888889
|
|
0.88888889 0.55555556 1. 0.66666667]
|
|
|
|
mean value: 0.74
|
|
|
|
key: train_accuracy
|
|
value: [0.77647059 0.78823529 0.76470588 0.77647059 0.81176471 0.80232558
|
|
0.81395349 0.76744186 0.77906977 0.8255814 ]
|
|
|
|
mean value: 0.7906019151846785
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.88888889 0.28571429 0.33333333 0.33333333 0.8
|
|
0.8 0.33333333 1. 0.4 ]
|
|
|
|
mean value: 0.5924603174603175
|
|
|
|
key: train_fscore
|
|
value: [0.64150943 0.625 0.61538462 0.65454545 0.7037037 0.69090909
|
|
0.7037037 0.62962963 0.65454545 0.71698113]
|
|
|
|
mean value: 0.6635912218459389
|
|
|
|
key: test_precision
|
|
value: [0.75 0.8 0.33333333 0.5 0.33333333 1.
|
|
1. 0.33333333 1. 0.5 ]
|
|
|
|
mean value: 0.655
|
|
|
|
key: train_precision
|
|
value: [0.73913043 0.83333333 0.72727273 0.72 0.82608696 0.79166667
|
|
0.82608696 0.73913043 0.75 0.86363636]
|
|
|
|
mean value: 0.7816343873517786
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.25 0.25 0.33333333 0.66666667
|
|
0.66666667 0.33333333 1. 0.33333333]
|
|
|
|
mean value: 0.5583333333333333
|
|
|
|
key: train_recall
|
|
value: [0.56666667 0.5 0.53333333 0.6 0.61290323 0.61290323
|
|
0.61290323 0.5483871 0.58064516 0.61290323]
|
|
|
|
mean value: 0.5780645161290323
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.91666667 0.45833333 0.54166667 0.52380952 0.83333333
|
|
0.83333333 0.5 1. 0.58333333]
|
|
|
|
mean value: 0.6982142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.72878788 0.72272727 0.71212121 0.73636364 0.76941458 0.76099707
|
|
0.77008798 0.71964809 0.73577713 0.77917889]
|
|
|
|
mean value: 0.7435103725426305
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.8 0.16666667 0.2 0.2 0.66666667
|
|
0.66666667 0.2 1. 0.25 ]
|
|
|
|
mean value: 0.475
|
|
|
|
key: train_jcc
|
|
value: [0.47222222 0.45454545 0.44444444 0.48648649 0.54285714 0.52777778
|
|
0.54285714 0.45945946 0.48648649 0.55882353]
|
|
|
|
mean value: 0.49759601465483816
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02036524 0.01379609 0.01414776 0.01401329 0.01353359 0.01360321
|
|
0.01371813 0.01394486 0.01390672 0.01339316]
|
|
|
|
mean value: 0.014442205429077148
|
|
|
|
key: score_time
|
|
value: [0.00943685 0.01183081 0.01178408 0.01181483 0.01192355 0.01141214
|
|
0.01138711 0.01142097 0.01171732 0.01146817]
|
|
|
|
mean value: 0.011419582366943359
|
|
|
|
key: test_mcc
|
|
value: [0.61237244 1. 0.35634832 0.35634832 0.80178373 0.5
|
|
0.79056942 0.15811388 0.63245553 0.18898224]
|
|
|
|
mean value: 0.539697387312167
|
|
|
|
key: train_mcc
|
|
value: [0.97437404 0.81997783 0.84541049 0.94899146 0.88395622 0.94956012
|
|
0.87505605 0.7334961 0.65238253 0.87379429]
|
|
|
|
mean value: 0.8556999126234828
|
|
|
|
key: test_accuracy
|
|
value: [0.8 1. 0.7 0.7 0.9 0.77777778
|
|
0.88888889 0.55555556 0.77777778 0.66666667]
|
|
|
|
mean value: 0.7766666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.98823529 0.91764706 0.92941176 0.97647059 0.94117647 0.97674419
|
|
0.94186047 0.84883721 0.79069767 0.94186047]
|
|
|
|
mean value: 0.9252941176470588
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 1. 0.57142857 0.57142857 0.85714286 0.66666667
|
|
0.85714286 0.5 0.75 0.4 ]
|
|
|
|
mean value: 0.684047619047619
|
|
|
|
key: train_fscore
|
|
value: [0.98305085 0.87272727 0.89285714 0.96551724 0.92537313 0.96774194
|
|
0.92063492 0.82666667 0.775 0.91525424]
|
|
|
|
mean value: 0.9044823398823305
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.66666667 0.66666667 0.75 0.66666667
|
|
0.75 0.4 0.6 0.5 ]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_precision
|
|
value: [1. 0.96 0.96153846 1. 0.86111111 0.96774194
|
|
0.90625 0.70454545 0.63265306 0.96428571]
|
|
|
|
mean value: 0.8958125738189102
|
|
|
|
key: test_recall
|
|
value: [0.5 1. 0.5 0.5 1. 0.66666667
|
|
1. 0.66666667 1. 0.33333333]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_recall
|
|
value: [0.96666667 0.8 0.83333333 0.93333333 1. 0.96774194
|
|
0.93548387 1. 1. 0.87096774]
|
|
|
|
mean value: 0.930752688172043
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 1. 0.66666667 0.66666667 0.92857143 0.75
|
|
0.91666667 0.58333333 0.83333333 0.58333333]
|
|
|
|
mean value: 0.7678571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.98333333 0.89090909 0.90757576 0.96666667 0.9537037 0.97478006
|
|
0.94046921 0.88181818 0.83636364 0.92639296]
|
|
|
|
mean value: 0.9262012599109374
|
|
|
|
key: test_jcc
|
|
value: [0.5 1. 0.4 0.4 0.75 0.5
|
|
0.75 0.33333333 0.6 0.25 ]
|
|
|
|
mean value: 0.5483333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.96666667 0.77419355 0.80645161 0.93333333 0.86111111 0.9375
|
|
0.85294118 0.70454545 0.63265306 0.84375 ]
|
|
|
|
mean value: 0.8313145964641966
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01296854 0.01292849 0.01249313 0.01251721 0.01263046 0.01278448
|
|
0.01254177 0.01312518 0.01244521 0.01282358]
|
|
|
|
mean value: 0.01272580623626709
|
|
|
|
key: score_time
|
|
value: [0.0105958 0.01149774 0.01142406 0.01138139 0.01136732 0.01166797
|
|
0.01163888 0.01169968 0.01176143 0.01148963]
|
|
|
|
mean value: 0.011452388763427735
|
|
|
|
key: test_mcc
|
|
value: [0.10206207 0.40824829 0.10206207 0.35634832 0.80178373 0.18898224
|
|
0.75592895 0. 1. 0.5 ]
|
|
|
|
mean value: 0.4215415666506035
|
|
|
|
key: train_mcc
|
|
value: [0.92361012 0.71852548 0.72648316 0.92382264 0.63212317 0.97491581
|
|
0.84888261 1. 0.84888261 0.88477584]
|
|
|
|
mean value: 0.848202141678347
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.7 0.6 0.7 0.9 0.66666667
|
|
0.88888889 0.55555556 1. 0.77777778]
|
|
|
|
mean value: 0.7388888888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.96470588 0.87058824 0.87058824 0.96470588 0.78823529 0.98837209
|
|
0.93023256 1. 0.93023256 0.94186047]
|
|
|
|
mean value: 0.924952120383037
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.4 0.33333333 0.57142857 0.85714286 0.4
|
|
0.8 0.33333333 1. 0.66666667]
|
|
|
|
mean value: 0.5695238095238095
|
|
|
|
key: train_fscore
|
|
value: [0.95081967 0.78431373 0.7755102 0.94736842 0.76923077 0.98360656
|
|
0.89655172 1. 0.89655172 0.92537313]
|
|
|
|
mean value: 0.8929325931967647
|
|
|
|
key: test_precision
|
|
value: [0.5 1. 0.5 0.66666667 0.75 0.5
|
|
1. 0.33333333 1. 0.66666667]
|
|
|
|
mean value: 0.6916666666666667
|
|
|
|
key: train_precision
|
|
value: [0.93548387 0.95238095 1. 1. 0.63829787 1.
|
|
0.96296296 1. 0.96296296 0.86111111]
|
|
|
|
mean value: 0.9313199732726156
|
|
|
|
key: test_recall
|
|
value: [0.25 0.25 0.25 0.5 1. 0.33333333
|
|
0.66666667 0.33333333 1. 0.66666667]
|
|
|
|
mean value: 0.525
|
|
|
|
key: train_recall
|
|
value: [0.96666667 0.66666667 0.63333333 0.9 0.96774194 0.96774194
|
|
0.83870968 1. 0.83870968 1. ]
|
|
|
|
mean value: 0.8779569892473118
|
|
|
|
key: test_roc_auc
|
|
value: [0.54166667 0.625 0.54166667 0.66666667 0.92857143 0.58333333
|
|
0.83333333 0.5 1. 0.75 ]
|
|
|
|
mean value: 0.6970238095238095
|
|
|
|
key: train_roc_auc
|
|
value: [0.96515152 0.82424242 0.81666667 0.95 0.82646356 0.98387097
|
|
0.91026393 1. 0.91026393 0.95454545]
|
|
|
|
mean value: 0.914146844792006
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.25 0.2 0.4 0.75 0.25
|
|
0.66666667 0.2 1. 0.5 ]
|
|
|
|
mean value: 0.44166666666666665
|
|
|
|
key: train_jcc
|
|
value: [0.90625 0.64516129 0.63333333 0.9 0.625 0.96774194
|
|
0.8125 1. 0.8125 0.86111111]
|
|
|
|
mean value: 0.8163597670250896
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11731172 0.09047723 0.08067465 0.08065319 0.08014202 0.08167005
|
|
0.09771609 0.08143854 0.08090734 0.0816648 ]
|
|
|
|
mean value: 0.08726556301116943
|
|
|
|
key: score_time
|
|
value: [0.01771545 0.01542926 0.01495719 0.01451015 0.01439619 0.01685309
|
|
0.01433587 0.01471639 0.01509571 0.01757908]
|
|
|
|
mean value: 0.015558838844299316
|
|
|
|
key: test_mcc
|
|
value: [0.80178373 0.81649658 1. 0.80178373 0.52380952 0.5
|
|
0.79056942 0.63245553 1. 0.5 ]
|
|
|
|
mean value: 0.7366898503287567
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 1. 0.9 0.8 0.77777778
|
|
0.88888889 0.77777778 1. 0.77777778]
|
|
|
|
mean value: 0.8722222222222222
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.88888889 1. 0.85714286 0.66666667 0.5
|
|
0.85714286 0.75 1. 0.66666667]
|
|
|
|
mean value: 0.8043650793650794
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 1. 1. 0.66666667 1.
|
|
0.75 0.6 1. 0.66666667]
|
|
|
|
mean value: 0.8483333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 0.75 0.66666667 0.33333333
|
|
1. 1. 1. 0.66666667]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.91666667 1. 0.875 0.76190476 0.66666667
|
|
0.91666667 0.83333333 1. 0.75 ]
|
|
|
|
mean value: 0.8595238095238096
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.8 1. 0.75 0.5 0.33333333
|
|
0.75 0.6 1. 0.5 ]
|
|
|
|
mean value: 0.6983333333333334
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03400898 0.03018355 0.03829837 0.03704643 0.02870989 0.04538727
|
|
0.03114915 0.0352478 0.04276204 0.05775356]
|
|
|
|
mean value: 0.038054704666137695
|
|
|
|
key: score_time
|
|
value: [0.01889777 0.02142334 0.02747583 0.01724172 0.02889967 0.02905703
|
|
0.02246833 0.04204655 0.02258444 0.02515888]
|
|
|
|
mean value: 0.025525355339050294
|
|
|
|
key: test_mcc
|
|
value: [0.80178373 1. 1. 0.80178373 0.76376262 1.
|
|
0.79056942 0.5 1. 0.5 ]
|
|
|
|
mean value: 0.8157899482342614
|
|
|
|
key: train_mcc
|
|
value: [0.97437404 0.97437404 0.97437404 1. 0.97475465 0.97491581
|
|
0.97491581 0.97491581 0.97491581 0.92547676]
|
|
|
|
mean value: 0.972301674153923
|
|
|
|
key: test_accuracy
|
|
value: [0.9 1. 1. 0.9 0.9 1.
|
|
0.88888889 0.77777778 1. 0.77777778]
|
|
|
|
mean value: 0.9144444444444444
|
|
|
|
key: train_accuracy
|
|
value: [0.98823529 0.98823529 0.98823529 1. 0.98823529 0.98837209
|
|
0.98837209 0.98837209 0.98837209 0.96511628]
|
|
|
|
mean value: 0.9871545827633379
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 1. 1. 0.85714286 0.8 1.
|
|
0.85714286 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.8704761904761904
|
|
|
|
key: train_fscore
|
|
value: [0.98305085 0.98305085 0.98305085 1. 0.98360656 0.98360656
|
|
0.98360656 0.98360656 0.98360656 0.94915254]
|
|
|
|
mean value: 0.9816337871631008
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.75 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.9083333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 0.75 0.66666667 1.
|
|
1. 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_recall
|
|
value: [0.96666667 0.96666667 0.96666667 1. 0.96774194 0.96774194
|
|
0.96774194 0.96774194 0.96774194 0.90322581]
|
|
|
|
mean value: 0.9641935483870968
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 1. 1. 0.875 0.83333333 1.
|
|
0.91666667 0.75 1. 0.75 ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_roc_auc
|
|
value: [0.98333333 0.98333333 0.98333333 1. 0.98387097 0.98387097
|
|
0.98387097 0.98387097 0.98387097 0.9516129 ]
|
|
|
|
mean value: 0.9820967741935485
|
|
|
|
key: test_jcc
|
|
value: [0.75 1. 1. 0.75 0.66666667 1.
|
|
0.75 0.5 1. 0.5 ]
|
|
|
|
mean value: 0.7916666666666666
|
|
|
|
key: train_jcc
|
|
value: [0.96666667 0.96666667 0.96666667 1. 0.96774194 0.96774194
|
|
0.96774194 0.96774194 0.96774194 0.90322581]
|
|
|
|
mean value: 0.9641935483870968
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01461864 0.01571798 0.01647425 0.01659679 0.01654196 0.01667929
|
|
0.01652122 0.01654172 0.01666808 0.03191781]
|
|
|
|
mean value: 0.017827773094177247
|
|
|
|
key: score_time
|
|
value: [0.01155424 0.01138949 0.01188564 0.01206255 0.01215148 0.01216245
|
|
0.01211071 0.01241565 0.01263618 0.01906419]
|
|
|
|
mean value: 0.012743258476257324
|
|
|
|
key: test_mcc
|
|
value: [ 0.61237244 0.35634832 0.35634832 -0.08908708 0.50917508 0.5
|
|
0.75592895 0. 0. 0. ]
|
|
|
|
mean value: 0.3001086023393888
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.7 0.7 0.5 0.8 0.77777778
|
|
0.88888889 0.55555556 0.66666667 0.55555556]
|
|
|
|
mean value: 0.6944444444444444
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.57142857 0.57142857 0.28571429 0.5 0.5
|
|
0.8 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.4561904761904762
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.66666667 0.33333333 1. 1.
|
|
1. 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.6333333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.5 0.25 0.33333333 0.33333333
|
|
0.66666667 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.375
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.66666667 0.66666667 0.45833333 0.66666667 0.66666667
|
|
0.83333333 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.6208333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.4 0.4 0.16666667 0.33333333 0.33333333
|
|
0.66666667 0.2 0. 0.2 ]
|
|
|
|
mean value: 0.32
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.18809891 0.18171883 0.17854691 0.14318514 0.19262028 0.18166924
|
|
0.20315766 0.15280724 0.19356513 0.19339061]
|
|
|
|
mean value: 0.18087599277496338
|
|
|
|
key: score_time
|
|
value: [0.00921273 0.00939512 0.009233 0.00900984 0.00917459 0.00901937
|
|
0.00965309 0.00958896 0.0104444 0.0093751 ]
|
|
|
|
mean value: 0.009410619735717773
|
|
|
|
key: test_mcc
|
|
value: [0.80178373 0.81649658 1. 0.80178373 0.80178373 0.75592895
|
|
0.79056942 0.5 1. 0.5 ]
|
|
|
|
mean value: 0.7768346119200095
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 1. 0.9 0.9 0.88888889
|
|
0.88888889 0.77777778 1. 0.77777778]
|
|
|
|
mean value: 0.8933333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.88888889 1. 0.85714286 0.85714286 0.8
|
|
0.85714286 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.8450793650793651
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 1. 1. 0.75 1.
|
|
0.75 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.8633333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 0.75 1. 0.66666667
|
|
1. 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.91666667 1. 0.875 0.92857143 0.83333333
|
|
0.91666667 0.75 1. 0.75 ]
|
|
|
|
mean value: 0.8845238095238095
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.8 1. 0.75 0.75 0.66666667
|
|
0.75 0.5 1. 0.5 ]
|
|
|
|
mean value: 0.7466666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01267958 0.01411915 0.01453662 0.01414657 0.01687789 0.01843524
|
|
0.01511002 0.01613832 0.01594377 0.01599717]
|
|
|
|
mean value: 0.015398430824279784
|
|
|
|
key: score_time
|
|
value: [0.01205015 0.01194572 0.01195192 0.01181078 0.012923 0.01364398
|
|
0.01315475 0.01197457 0.01326942 0.01356363]
|
|
|
|
mean value: 0.012628793716430664
|
|
|
|
key: test_mcc
|
|
value: [-0.53452248 0.35634832 -0.27216553 0.10206207 0.21821789 0.
|
|
0.79056942 0. 0.79056942 0. ]
|
|
|
|
mean value: 0.14510791046852894
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.3 0.7 0.5 0.6 0.7 0.66666667
|
|
0.88888889 0.55555556 0.88888889 0.55555556]
|
|
|
|
mean value: 0.6355555555555555
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0.57142857 0. 0.33333333 0.4 0.
|
|
0.85714286 0.33333333 0.85714286 0.33333333]
|
|
|
|
mean value: 0.36857142857142855
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 0.66666667 0. 0.5 0.5 0.
|
|
0.75 0.33333333 0.75 0.33333333]
|
|
|
|
mean value: 0.3833333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.5 0. 0.25 0.33333333 0.
|
|
1. 0.33333333 1. 0.33333333]
|
|
|
|
mean value: 0.375
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.25 0.66666667 0.41666667 0.54166667 0.5952381 0.5
|
|
0.91666667 0.5 0.91666667 0.5 ]
|
|
|
|
mean value: 0.5803571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0.4 0. 0.2 0.25 0. 0.75 0.2 0.75 0.2 ]
|
|
|
|
mean value: 0.275
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.19
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03610539 0.04523587 0.04013562 0.05722523 0.0501008 0.05744076
|
|
0.04511309 0.04056859 0.04986453 0.052284 ]
|
|
|
|
mean value: 0.04740738868713379
|
|
|
|
key: score_time
|
|
value: [0.0153594 0.02304769 0.0128181 0.02825665 0.03019476 0.02031326
|
|
0.01596236 0.01964951 0.01914048 0.02306318]
|
|
|
|
mean value: 0.020780539512634276
|
|
|
|
key: test_mcc
|
|
value: [0.16666667 1. 0.40824829 0.61237244 1. 0.31622777
|
|
0.5 0. 1. 0.18898224]
|
|
|
|
mean value: 0.5192497395347776
|
|
|
|
key: train_mcc
|
|
value: [0.97437404 0.92244815 0.89661206 0.92244815 0.92361012 0.94956012
|
|
0.92410201 1. 0.92410201 0.97491581]
|
|
|
|
mean value: 0.941217245812007
|
|
|
|
key: test_accuracy
|
|
value: [0.6 1. 0.7 0.8 1. 0.66666667
|
|
0.77777778 0.55555556 1. 0.66666667]
|
|
|
|
mean value: 0.7766666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.98823529 0.96470588 0.95294118 0.96470588 0.96470588 0.97674419
|
|
0.96511628 1. 0.96511628 0.98837209]
|
|
|
|
mean value: 0.9730642954856361
|
|
|
|
key: test_fscore
|
|
value: [0.5 1. 0.66666667 0.66666667 1. 0.57142857
|
|
0.66666667 0.33333333 1. 0.4 ]
|
|
|
|
mean value: 0.6804761904761905
|
|
|
|
key: train_fscore
|
|
value: [0.98305085 0.94915254 0.93103448 0.94915254 0.95081967 0.96774194
|
|
0.95081967 1. 0.95081967 0.98360656]
|
|
|
|
mean value: 0.9616197924216374
|
|
|
|
key: test_precision
|
|
value: [0.5 1. 0.6 1. 1. 0.5
|
|
0.66666667 0.33333333 1. 0.5 ]
|
|
|
|
mean value: 0.71
|
|
|
|
key: train_precision
|
|
value: [1. 0.96551724 0.96428571 0.96551724 0.96666667 0.96774194
|
|
0.96666667 1. 0.96666667 1. ]
|
|
|
|
mean value: 0.9763062132528206
|
|
|
|
key: test_recall
|
|
value: [0.5 1. 0.75 0.5 1. 0.66666667
|
|
0.66666667 0.33333333 1. 0.33333333]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_recall
|
|
value: [0.96666667 0.93333333 0.9 0.93333333 0.93548387 0.96774194
|
|
0.93548387 1. 0.93548387 0.96774194]
|
|
|
|
mean value: 0.9475268817204301
|
|
|
|
key: test_roc_auc
|
|
value: [0.58333333 1. 0.70833333 0.75 1. 0.66666667
|
|
0.75 0.5 1. 0.58333333]
|
|
|
|
mean value: 0.7541666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.98333333 0.95757576 0.94090909 0.95757576 0.95848268 0.97478006
|
|
0.95865103 1. 0.95865103 0.98387097]
|
|
|
|
mean value: 0.9673829694797437
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 1. 0.5 0.5 1. 0.4
|
|
0.5 0.2 1. 0.25 ]
|
|
|
|
mean value: 0.5683333333333334
|
|
|
|
key: train_jcc
|
|
value: [0.96666667 0.90322581 0.87096774 0.90322581 0.90625 0.9375
|
|
0.90625 1. 0.90625 0.96774194]
|
|
|
|
mean value: 0.9268077956989247
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.25181079 0.24768829 0.20416474 0.19092536 0.20423031 0.29037714
|
|
0.19072556 0.19263363 0.19317007 0.19392085]
|
|
|
|
mean value: 0.215964674949646
|
|
|
|
key: score_time
|
|
value: [0.02449012 0.01991653 0.02192664 0.01992583 0.02087021 0.02371073
|
|
0.02168751 0.02192473 0.02187562 0.02086639]
|
|
|
|
mean value: 0.02171943187713623
|
|
|
|
key: test_mcc
|
|
value: [0.16666667 1. 0.40824829 0.61237244 1. 0.31622777
|
|
0.5 0. 1. 0.18898224]
|
|
|
|
mean value: 0.5192497395347776
|
|
|
|
key: train_mcc
|
|
value: [0.97437404 0.92244815 0.89661206 0.92244815 0.92361012 0.94956012
|
|
0.92410201 1. 0.92410201 0.97491581]
|
|
|
|
mean value: 0.941217245812007
|
|
|
|
key: test_accuracy
|
|
value: [0.6 1. 0.7 0.8 1. 0.66666667
|
|
0.77777778 0.55555556 1. 0.66666667]
|
|
|
|
mean value: 0.7766666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.98823529 0.96470588 0.95294118 0.96470588 0.96470588 0.97674419
|
|
0.96511628 1. 0.96511628 0.98837209]
|
|
|
|
mean value: 0.9730642954856361
|
|
|
|
key: test_fscore
|
|
value: [0.5 1. 0.66666667 0.66666667 1. 0.57142857
|
|
0.66666667 0.33333333 1. 0.4 ]
|
|
|
|
mean value: 0.6804761904761905
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_8020.py:107: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_8020.py:110: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.98305085 0.94915254 0.93103448 0.94915254 0.95081967 0.96774194
|
|
0.95081967 1. 0.95081967 0.98360656]
|
|
|
|
mean value: 0.9616197924216374
|
|
|
|
key: test_precision
|
|
value: [0.5 1. 0.6 1. 1. 0.5
|
|
0.66666667 0.33333333 1. 0.5 ]
|
|
|
|
mean value: 0.71
|
|
|
|
key: train_precision
|
|
value: [1. 0.96551724 0.96428571 0.96551724 0.96666667 0.96774194
|
|
0.96666667 1. 0.96666667 1. ]
|
|
|
|
mean value: 0.9763062132528206
|
|
|
|
key: test_recall
|
|
value: [0.5 1. 0.75 0.5 1. 0.66666667
|
|
0.66666667 0.33333333 1. 0.33333333]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_recall
|
|
value: [0.96666667 0.93333333 0.9 0.93333333 0.93548387 0.96774194
|
|
0.93548387 1. 0.93548387 0.96774194]
|
|
|
|
mean value: 0.9475268817204301
|
|
|
|
key: test_roc_auc
|
|
value: [0.58333333 1. 0.70833333 0.75 1. 0.66666667
|
|
0.75 0.5 1. 0.58333333]
|
|
|
|
mean value: 0.7541666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.98333333 0.95757576 0.94090909 0.95757576 0.95848268 0.97478006
|
|
0.95865103 1. 0.95865103 0.98387097]
|
|
|
|
mean value: 0.9673829694797437
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 1. 0.5 0.5 1. 0.4
|
|
0.5 0.2 1. 0.25 ]
|
|
|
|
mean value: 0.5683333333333334
|
|
|
|
key: train_jcc
|
|
value: [0.96666667 0.90322581 0.87096774 0.90322581 0.90625 0.9375
|
|
0.90625 1. 0.90625 0.96774194]
|
|
|
|
mean value: 0.9268077956989247
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07924438 0.03076434 0.02893591 0.03335929 0.03235078 0.02986813
|
|
0.02339029 0.03050017 0.03402185 0.02506852]
|
|
|
|
mean value: 0.0347503662109375
|
|
|
|
key: score_time
|
|
value: [0.02084374 0.01178765 0.012007 0.01207995 0.0117662 0.01195741
|
|
0.01165009 0.01196384 0.01176953 0.01173854]
|
|
|
|
mean value: 0.01275639533996582
|
|
|
|
key: test_mcc
|
|
value: [0.38095238 0.38095238 0.57735027 0.66666667 1. 0.50709255
|
|
0.19245009 0.16903085 0.70710678 0.70710678]
|
|
|
|
mean value: 0.5288708753646838
|
|
|
|
key: train_mcc
|
|
value: [0.90838671 0.85319865 0.87287156 0.87287156 0.8376106 0.87287156
|
|
0.92973479 0.92788641 0.92788641 0.87287156]
|
|
|
|
mean value: 0.8876189815128092
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.69230769 0.75 0.83333333 1. 0.75
|
|
0.58333333 0.58333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7551282051282051
|
|
|
|
key: train_accuracy
|
|
value: [0.95412844 0.9266055 0.93636364 0.93636364 0.91818182 0.93636364
|
|
0.96363636 0.96363636 0.96363636 0.93636364]
|
|
|
|
mean value: 0.9435279399499583
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.66666667 0.66666667 0.83333333 1. 0.72727273
|
|
0.66666667 0.54545455 0.85714286 0.85714286]
|
|
|
|
mean value: 0.7534632034632034
|
|
|
|
key: train_fscore
|
|
value: [0.95327103 0.92727273 0.93577982 0.93577982 0.91588785 0.93577982
|
|
0.96226415 0.96428571 0.96428571 0.93577982]
|
|
|
|
mean value: 0.9430386451347271
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.66666667 1. 0.83333333 1. 0.8
|
|
0.55555556 0.6 0.75 0.75 ]
|
|
|
|
mean value: 0.766984126984127
|
|
|
|
key: train_precision
|
|
value: [0.96226415 0.92727273 0.94444444 0.94444444 0.94230769 0.94444444
|
|
1. 0.94736842 0.94736842 0.94444444]
|
|
|
|
mean value: 0.9504359190406857
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.66666667 0.5 0.83333333 1. 0.66666667
|
|
0.83333333 0.5 1. 1. ]
|
|
|
|
mean value: 0.7714285714285715
|
|
|
|
key: train_recall
|
|
value: [0.94444444 0.92727273 0.92727273 0.92727273 0.89090909 0.92727273
|
|
0.92727273 0.98181818 0.98181818 0.92727273]
|
|
|
|
mean value: 0.9362626262626262
|
|
|
|
key: test_roc_auc
|
|
value: [0.69047619 0.69047619 0.75 0.83333333 1. 0.75
|
|
0.58333333 0.58333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7547619047619047
|
|
|
|
key: train_roc_auc
|
|
value: [0.9540404 0.92659933 0.93636364 0.93636364 0.91818182 0.93636364
|
|
0.96363636 0.96363636 0.96363636 0.93636364]
|
|
|
|
mean value: 0.9435185185185184
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.5 0.5 0.71428571 1. 0.57142857
|
|
0.5 0.375 0.75 0.75 ]
|
|
|
|
mean value: 0.6216269841269841
|
|
|
|
key: train_jcc
|
|
value: [0.91071429 0.86440678 0.87931034 0.87931034 0.84482759 0.87931034
|
|
0.92727273 0.93103448 0.93103448 0.87931034]
|
|
|
|
mean value: 0.8926531723682513
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.85057044 0.80782437 0.79492974 0.79358053 0.62308025 0.62692785
|
|
0.66689968 0.65987492 0.6033361 0.87334943]
|
|
|
|
mean value: 0.7300373315811157
|
|
|
|
key: score_time
|
|
value: [0.01589251 0.01189089 0.0155108 0.02012467 0.01325583 0.0130291
|
|
0.01516151 0.01375866 0.01316547 0.01353717]
|
|
|
|
mean value: 0.014532661437988282
|
|
|
|
key: test_mcc
|
|
value: [0.69047619 0.69047619 0.84515425 0.84515425 1. 0.84515425
|
|
0.4472136 0.50709255 0.57735027 0.70710678]
|
|
|
|
mean value: 0.7155178343851172
|
|
|
|
key: train_mcc
|
|
value: [1. 0.98181211 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9981812113622065
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.84615385 0.91666667 0.91666667 1. 0.91666667
|
|
0.66666667 0.75 0.75 0.83333333]
|
|
|
|
mean value: 0.8442307692307692
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99082569 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9990825688073395
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.83333333 0.90909091 0.92307692 1. 0.90909091
|
|
0.75 0.76923077 0.8 0.85714286]
|
|
|
|
mean value: 0.8608108558108558
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99099099 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9990990990990991
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.83333333 1. 0.85714286 1. 1.
|
|
0.6 0.71428571 0.66666667 0.75 ]
|
|
|
|
mean value: 0.8278571428571428
|
|
|
|
key: train_precision
|
|
value: [1. 0.98214286 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9982142857142857
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.83333333 0.83333333 1. 1. 0.83333333
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.9190476190476191
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8452381 0.8452381 0.91666667 0.91666667 1. 0.91666667
|
|
0.66666667 0.75 0.75 0.83333333]
|
|
|
|
mean value: 0.8440476190476192
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99074074 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9990740740740741
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.71428571 0.83333333 0.85714286 1. 0.83333333
|
|
0.6 0.625 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7629761904761905
|
|
|
|
key: train_jcc
|
|
value: [1. 0.98214286 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9982142857142857
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0124805 0.01003742 0.00925112 0.00835156 0.00836635 0.00841904
|
|
0.00879598 0.00886655 0.00840139 0.00848889]
|
|
|
|
mean value: 0.009145879745483398
|
|
|
|
key: score_time
|
|
value: [0.01441097 0.00922799 0.00902963 0.00841188 0.00836849 0.0083313
|
|
0.00862026 0.00835013 0.00843978 0.00841498]
|
|
|
|
mean value: 0.009160542488098144
|
|
|
|
key: test_mcc
|
|
value: [ 0.22537447 0.14085904 0.84515425 0.4472136 0. 0.19245009
|
|
-0.30151134 -0.16903085 0.30151134 0. ]
|
|
|
|
mean value: 0.16820205993950035
|
|
|
|
key: train_mcc
|
|
value: [0.46197053 0.40341862 0.60644963 0.44035242 0.40032038 0.50517938
|
|
0.44255326 0.55670221 0.47455475 0.41659779]
|
|
|
|
mean value: 0.4708098987112775
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.53846154 0.91666667 0.66666667 0.5 0.58333333
|
|
0.41666667 0.41666667 0.58333333 0.5 ]
|
|
|
|
mean value: 0.5737179487179487
|
|
|
|
key: train_accuracy
|
|
value: [0.70642202 0.66972477 0.8 0.7 0.68181818 0.72727273
|
|
0.70909091 0.77272727 0.70909091 0.69090909]
|
|
|
|
mean value: 0.7167055879899916
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.625 0.90909091 0.75 0.5 0.66666667
|
|
0.58823529 0.46153846 0.70588235 0.66666667]
|
|
|
|
mean value: 0.6578962703962704
|
|
|
|
key: train_fscore
|
|
value: [0.75757576 0.74285714 0.81355932 0.7518797 0.73684211 0.7761194
|
|
0.75 0.79338843 0.76470588 0.74242424]
|
|
|
|
mean value: 0.7629351984492402
|
|
|
|
key: test_precision
|
|
value: [0.6 0.5 1. 0.6 0.5 0.55555556
|
|
0.45454545 0.42857143 0.54545455 0.5 ]
|
|
|
|
mean value: 0.5684126984126984
|
|
|
|
key: train_precision
|
|
value: [0.64102564 0.61176471 0.76190476 0.64102564 0.62820513 0.65822785
|
|
0.65753425 0.72727273 0.64197531 0.63636364]
|
|
|
|
mean value: 0.6605299644998472
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.83333333 0.83333333 1. 0.5 0.83333333
|
|
0.83333333 0.5 1. 1. ]
|
|
|
|
mean value: 0.819047619047619
|
|
|
|
key: train_recall
|
|
value: [0.92592593 0.94545455 0.87272727 0.90909091 0.89090909 0.94545455
|
|
0.87272727 0.87272727 0.94545455 0.89090909]
|
|
|
|
mean value: 0.9071380471380471
|
|
|
|
key: test_roc_auc
|
|
value: [0.5952381 0.55952381 0.91666667 0.66666667 0.5 0.58333333
|
|
0.41666667 0.41666667 0.58333333 0.5 ]
|
|
|
|
mean value: 0.5738095238095239
|
|
|
|
key: train_roc_auc
|
|
value: [0.70841751 0.66717172 0.8 0.7 0.68181818 0.72727273
|
|
0.70909091 0.77272727 0.70909091 0.69090909]
|
|
|
|
mean value: 0.7166498316498316
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.45454545 0.83333333 0.6 0.33333333 0.5
|
|
0.41666667 0.3 0.54545455 0.5 ]
|
|
|
|
mean value: 0.5028787878787879
|
|
|
|
key: train_jcc
|
|
value: [0.6097561 0.59090909 0.68571429 0.60240964 0.58333333 0.63414634
|
|
0.6 0.65753425 0.61904762 0.59036145]
|
|
|
|
mean value: 0.6173212098941411
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00866246 0.00953245 0.00884199 0.00914764 0.00874949 0.00864816
|
|
0.00969696 0.0096283 0.0096209 0.00967336]
|
|
|
|
mean value: 0.009220170974731445
|
|
|
|
key: score_time
|
|
value: [0.0083909 0.00880027 0.00909829 0.00877166 0.00895333 0.00850987
|
|
0.00889373 0.00976372 0.00920582 0.00913 ]
|
|
|
|
mean value: 0.008951759338378907
|
|
|
|
key: test_mcc
|
|
value: [ 0.38095238 0.7200823 0.84515425 0.50709255 0. 0.16903085
|
|
0.4472136 -0.16903085 0.57735027 0.4472136 ]
|
|
|
|
mean value: 0.3925058948530645
|
|
|
|
key: train_mcc
|
|
value: [0.63370218 0.66442782 0.6965738 0.6520733 0.55290734 0.65991202
|
|
0.7167132 0.55290734 0.67995868 0.6520733 ]
|
|
|
|
mean value: 0.646124894909005
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.84615385 0.91666667 0.75 0.5 0.58333333
|
|
0.66666667 0.41666667 0.75 0.66666667]
|
|
|
|
mean value: 0.6788461538461539
|
|
|
|
key: train_accuracy
|
|
value: [0.80733945 0.82568807 0.84545455 0.81818182 0.77272727 0.82727273
|
|
0.85454545 0.77272727 0.83636364 0.81818182]
|
|
|
|
mean value: 0.8178482068390325
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.8 0.92307692 0.76923077 0.5 0.61538462
|
|
0.75 0.46153846 0.8 0.75 ]
|
|
|
|
mean value: 0.7083516483516483
|
|
|
|
key: train_fscore
|
|
value: [0.82644628 0.84297521 0.85470085 0.83606557 0.78991597 0.83760684
|
|
0.86440678 0.78991597 0.84745763 0.83606557]
|
|
|
|
mean value: 0.8325556667004752
|
|
|
|
key: test_precision
|
|
value: [0.71428571 1. 0.85714286 0.71428571 0.5 0.57142857
|
|
0.6 0.42857143 0.66666667 0.6 ]
|
|
|
|
mean value: 0.6652380952380952
|
|
|
|
key: train_precision
|
|
value: [0.74626866 0.77272727 0.80645161 0.76119403 0.734375 0.79032258
|
|
0.80952381 0.734375 0.79365079 0.76119403]
|
|
|
|
mean value: 0.7710082785868173
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.66666667 1. 0.83333333 0.5 0.66666667
|
|
1. 0.5 1. 1. ]
|
|
|
|
mean value: 0.7880952380952381
|
|
|
|
key: train_recall
|
|
value: [0.92592593 0.92727273 0.90909091 0.92727273 0.85454545 0.89090909
|
|
0.92727273 0.85454545 0.90909091 0.92727273]
|
|
|
|
mean value: 0.9053198653198653
|
|
|
|
key: test_roc_auc
|
|
value: [0.69047619 0.83333333 0.91666667 0.75 0.5 0.58333333
|
|
0.66666667 0.41666667 0.75 0.66666667]
|
|
|
|
mean value: 0.6773809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [0.80841751 0.82474747 0.84545455 0.81818182 0.77272727 0.82727273
|
|
0.85454545 0.77272727 0.83636364 0.81818182]
|
|
|
|
mean value: 0.8178619528619528
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.66666667 0.85714286 0.625 0.33333333 0.44444444
|
|
0.6 0.3 0.66666667 0.6 ]
|
|
|
|
mean value: 0.5648809523809524
|
|
|
|
key: train_jcc
|
|
value: [0.70422535 0.72857143 0.74626866 0.71830986 0.65277778 0.72058824
|
|
0.76119403 0.65277778 0.73529412 0.71830986]
|
|
|
|
mean value: 0.713831709405786
|
|
|
|
MCC on Blind test: -0.15
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01272488 0.00841665 0.0083909 0.00904155 0.00945926 0.00858402
|
|
0.00938678 0.00953054 0.0085609 0.00950718]
|
|
|
|
mean value: 0.009360265731811524
|
|
|
|
key: score_time
|
|
value: [0.00989175 0.0092876 0.00953484 0.00939918 0.01030254 0.00957727
|
|
0.01058912 0.00944829 0.00953531 0.01539087]
|
|
|
|
mean value: 0.010295677185058593
|
|
|
|
key: test_mcc
|
|
value: [0.22537447 0.85714286 1. 0.19245009 0.35355339 0.50709255
|
|
0.50709255 0.16903085 0.4472136 0.57735027]
|
|
|
|
mean value: 0.4836300626703117
|
|
|
|
key: train_mcc
|
|
value: [0.69807573 0.64823028 0.6425396 0.72659596 0.68933463 0.71105713
|
|
0.62662831 0.68413024 0.6520733 0.63092918]
|
|
|
|
mean value: 0.6709594355892341
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.92307692 1. 0.58333333 0.66666667 0.75
|
|
0.75 0.58333333 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7288461538461538
|
|
|
|
key: train_accuracy
|
|
value: [0.8440367 0.81651376 0.80909091 0.85454545 0.83636364 0.84545455
|
|
0.80909091 0.83636364 0.81818182 0.80909091]
|
|
|
|
mean value: 0.8278732276897415
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.92307692 1. 0.66666667 0.71428571 0.76923077
|
|
0.76923077 0.61538462 0.75 0.8 ]
|
|
|
|
mean value: 0.7713757810816634
|
|
|
|
key: train_fscore
|
|
value: [0.85470085 0.83606557 0.832 0.86885246 0.85245902 0.86178862
|
|
0.82352941 0.85 0.83606557 0.82644628]
|
|
|
|
mean value: 0.8441907788294295
|
|
|
|
key: test_precision
|
|
value: [0.6 0.85714286 1. 0.55555556 0.625 0.71428571
|
|
0.71428571 0.57142857 0.6 0.66666667]
|
|
|
|
mean value: 0.6904365079365079
|
|
|
|
key: train_precision
|
|
value: [0.79365079 0.76119403 0.74285714 0.79104478 0.7761194 0.77941176
|
|
0.765625 0.78461538 0.76119403 0.75757576]
|
|
|
|
mean value: 0.7713288082210931
|
|
|
|
key: test_recall
|
|
value: [0.85714286 1. 1. 0.83333333 0.83333333 0.83333333
|
|
0.83333333 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.8857142857142857
|
|
|
|
key: train_recall
|
|
value: [0.92592593 0.92727273 0.94545455 0.96363636 0.94545455 0.96363636
|
|
0.89090909 0.92727273 0.92727273 0.90909091]
|
|
|
|
mean value: 0.9325925925925925
|
|
|
|
key: test_roc_auc
|
|
value: [0.5952381 0.92857143 1. 0.58333333 0.66666667 0.75
|
|
0.75 0.58333333 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7273809523809525
|
|
|
|
key: train_roc_auc
|
|
value: [0.84478114 0.81548822 0.80909091 0.85454545 0.83636364 0.84545455
|
|
0.80909091 0.83636364 0.81818182 0.80909091]
|
|
|
|
mean value: 0.8278451178451178
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.85714286 1. 0.5 0.55555556 0.625
|
|
0.625 0.44444444 0.6 0.66666667]
|
|
|
|
mean value: 0.6419264069264069
|
|
|
|
key: train_jcc
|
|
value: [0.74626866 0.71830986 0.71232877 0.76811594 0.74285714 0.75714286
|
|
0.7 0.73913043 0.71830986 0.70422535]
|
|
|
|
mean value: 0.7306688871073835
|
|
|
|
MCC on Blind test: -0.26
|
|
|
|
Accuracy on Blind test: 0.38
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01118755 0.0109663 0.01057386 0.00992179 0.01036787 0.01079488
|
|
0.01068735 0.01067853 0.01073766 0.01000071]
|
|
|
|
mean value: 0.010591650009155273
|
|
|
|
key: score_time
|
|
value: [0.00984097 0.00979114 0.01014543 0.00923896 0.00875735 0.00881028
|
|
0.00956368 0.00914264 0.00931549 0.00970268]
|
|
|
|
mean value: 0.009430861473083496
|
|
|
|
key: test_mcc
|
|
value: [0.21957752 0.38095238 0.57735027 0.50709255 0.4472136 0.66666667
|
|
0.84515425 0.35355339 0.66666667 0.70710678]
|
|
|
|
mean value: 0.5371334074734165
|
|
|
|
key: train_mcc
|
|
value: [0.82131618 0.85319865 0.7823356 0.83984125 0.73720978 0.80119274
|
|
0.82697677 0.8376106 0.80013226 0.83984125]
|
|
|
|
mean value: 0.8139655097598576
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.69230769 0.75 0.75 0.66666667 0.83333333
|
|
0.91666667 0.66666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7557692307692307
|
|
|
|
key: train_accuracy
|
|
value: [0.90825688 0.9266055 0.89090909 0.91818182 0.86363636 0.9
|
|
0.90909091 0.91818182 0.9 0.91818182]
|
|
|
|
mean value: 0.9053044203502919
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.66666667 0.72727273 0.5 0.83333333
|
|
0.92307692 0.6 0.83333333 0.85714286]
|
|
|
|
mean value: 0.7274159174159174
|
|
|
|
key: train_fscore
|
|
value: [0.90196078 0.92727273 0.88888889 0.91428571 0.85148515 0.89719626
|
|
0.90196078 0.91588785 0.89908257 0.91428571]
|
|
|
|
mean value: 0.901230644283222
|
|
|
|
key: test_precision
|
|
value: [0.625 0.66666667 1. 0.8 1. 0.83333333
|
|
0.85714286 0.75 0.83333333 0.75 ]
|
|
|
|
mean value: 0.8115476190476191
|
|
|
|
key: train_precision
|
|
value: [0.95833333 0.92727273 0.90566038 0.96 0.93478261 0.92307692
|
|
0.9787234 0.94230769 0.90740741 0.96 ]
|
|
|
|
mean value: 0.9397564473707545
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.66666667 0.5 0.66666667 0.33333333 0.83333333
|
|
1. 0.5 0.83333333 1. ]
|
|
|
|
mean value: 0.7047619047619048
|
|
|
|
key: train_recall
|
|
value: [0.85185185 0.92727273 0.87272727 0.87272727 0.78181818 0.87272727
|
|
0.83636364 0.89090909 0.89090909 0.87272727]
|
|
|
|
mean value: 0.867003367003367
|
|
|
|
key: test_roc_auc
|
|
value: [0.60714286 0.69047619 0.75 0.75 0.66666667 0.83333333
|
|
0.91666667 0.66666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7547619047619047
|
|
|
|
key: train_roc_auc
|
|
value: [0.90774411 0.92659933 0.89090909 0.91818182 0.86363636 0.9
|
|
0.90909091 0.91818182 0.9 0.91818182]
|
|
|
|
mean value: 0.9052525252525252
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.5 0.57142857 0.33333333 0.71428571
|
|
0.85714286 0.42857143 0.71428571 0.75 ]
|
|
|
|
mean value: 0.5869047619047619
|
|
|
|
key: train_jcc
|
|
value: [0.82142857 0.86440678 0.8 0.84210526 0.74137931 0.81355932
|
|
0.82142857 0.84482759 0.81666667 0.84210526]
|
|
|
|
mean value: 0.8207907334086239
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.58330989 0.47644663 0.51219845 0.58515596 0.54524374 0.48944783
|
|
0.43725181 0.51959181 0.48605871 0.50858068]
|
|
|
|
mean value: 0.5143285512924194
|
|
|
|
key: score_time
|
|
value: [0.01218319 0.01217771 0.01212764 0.0122633 0.01239061 0.01242757
|
|
0.01242876 0.01242137 0.01214433 0.01211905]
|
|
|
|
mean value: 0.012268352508544921
|
|
|
|
key: test_mcc
|
|
value: [0.69047619 0.7200823 0.70710678 0.66666667 1. 0.50709255
|
|
0.35355339 0.33333333 0.70710678 0.70710678]
|
|
|
|
mean value: 0.6392524777289312
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.84615385 0.83333333 0.83333333 1. 0.75
|
|
0.66666667 0.66666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8108974358974359
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.8 0.8 0.83333333 1. 0.72727273
|
|
0.71428571 0.66666667 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8112987012987013
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 1. 1. 0.83333333 1. 0.8
|
|
0.625 0.66666667 0.75 0.75 ]
|
|
|
|
mean value: 0.8282142857142857
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.66666667 0.66666667 0.83333333 1. 0.66666667
|
|
0.83333333 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.819047619047619
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8452381 0.83333333 0.83333333 0.83333333 1. 0.75
|
|
0.66666667 0.66666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8095238095238095
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.66666667 0.66666667 0.71428571 1. 0.57142857
|
|
0.55555556 0.5 0.75 0.75 ]
|
|
|
|
mean value: 0.6924603174603174
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01502514 0.01379395 0.01178455 0.01206064 0.01090789 0.0119524
|
|
0.01056623 0.0107739 0.0109334 0.01117158]
|
|
|
|
mean value: 0.011896967887878418
|
|
|
|
key: score_time
|
|
value: [0.01183581 0.00926375 0.00914383 0.00856352 0.00859094 0.00919175
|
|
0.00850463 0.00845695 0.00867391 0.00855637]
|
|
|
|
mean value: 0.009078145027160645
|
|
|
|
key: test_mcc
|
|
value: [0.85714286 0.85714286 0.70710678 1. 0.84515425 0.84515425
|
|
0.84515425 0.50709255 0.84515425 0.84515425]
|
|
|
|
mean value: 0.8154256321951955
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.92307692 0.83333333 1. 0.91666667 0.91666667
|
|
0.91666667 0.75 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9012820512820513
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.92307692 0.8 1. 0.90909091 0.92307692
|
|
0.90909091 0.76923077 0.92307692 0.92307692]
|
|
|
|
mean value: 0.9002797202797203
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.85714286 1. 1. 1. 0.85714286
|
|
1. 0.71428571 0.85714286 0.85714286]
|
|
|
|
mean value: 0.9142857142857143
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 1. 0.66666667 1. 0.83333333 1.
|
|
0.83333333 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.9023809523809524
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92857143 0.92857143 0.83333333 1. 0.91666667 0.91666667
|
|
0.91666667 0.75 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9023809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.85714286 0.66666667 1. 0.83333333 0.85714286
|
|
0.83333333 0.625 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8244047619047619
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09012508 0.08951235 0.08652997 0.09003401 0.08778095 0.09055924
|
|
0.08863854 0.08886576 0.08846521 0.08982038]
|
|
|
|
mean value: 0.08903315067291259
|
|
|
|
key: score_time
|
|
value: [0.01790714 0.01840067 0.01757932 0.01700807 0.01778626 0.01852798
|
|
0.01864576 0.01812649 0.01855206 0.01738501]
|
|
|
|
mean value: 0.01799187660217285
|
|
|
|
key: test_mcc
|
|
value: [0.38095238 0.85391256 0.70710678 1. 0.84515425 0.66666667
|
|
0.35355339 0.50709255 0.70710678 0.70710678]
|
|
|
|
mean value: 0.6728652153167557
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.92307692 0.83333333 1. 0.91666667 0.83333333
|
|
0.66666667 0.75 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8282051282051283
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.90909091 0.8 1. 0.90909091 0.83333333
|
|
0.71428571 0.72727273 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8321645021645021
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 1. 1. 1. 1. 0.83333333
|
|
0.625 0.8 0.75 0.75 ]
|
|
|
|
mean value: 0.8472619047619048
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.83333333 0.66666667 1. 0.83333333 0.83333333
|
|
0.83333333 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.8380952380952381
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.69047619 0.91666667 0.83333333 1. 0.91666667 0.83333333
|
|
0.66666667 0.75 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8273809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.83333333 0.66666667 1. 0.83333333 0.71428571
|
|
0.55555556 0.57142857 0.75 0.75 ]
|
|
|
|
mean value: 0.723015873015873
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00991583 0.0101099 0.00930738 0.0092597 0.0097456 0.01009154
|
|
0.00920773 0.00939512 0.07211041 0.00968695]
|
|
|
|
mean value: 0.01588301658630371
|
|
|
|
key: score_time
|
|
value: [0.00998712 0.00942707 0.00926399 0.00929952 0.00923967 0.00948429
|
|
0.00940251 0.0085876 0.00951457 0.00935388]
|
|
|
|
mean value: 0.009356021881103516
|
|
|
|
key: test_mcc
|
|
value: [0.05143445 0.38095238 0.66666667 0.35355339 0. 0.16903085
|
|
0.50709255 0.33333333 0.70710678 0.70710678]
|
|
|
|
mean value: 0.3876277187688927
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.53846154 0.69230769 0.83333333 0.66666667 0.5 0.58333333
|
|
0.75 0.66666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.6897435897435897
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.66666667 0.83333333 0.71428571 0.5 0.54545455
|
|
0.76923077 0.66666667 0.85714286 0.85714286]
|
|
|
|
mean value: 0.703492340992341
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.66666667 0.83333333 0.625 0.5 0.6
|
|
0.71428571 0.66666667 0.75 0.75 ]
|
|
|
|
mean value: 0.6661507936507937
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.66666667 0.83333333 0.83333333 0.5 0.5
|
|
0.83333333 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.7547619047619047
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.52380952 0.69047619 0.83333333 0.66666667 0.5 0.58333333
|
|
0.75 0.66666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.6880952380952381
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.5 0.71428571 0.55555556 0.33333333 0.375
|
|
0.625 0.5 0.75 0.75 ]
|
|
|
|
mean value: 0.5557720057720058
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.11299348 1.11191034 1.09491897 1.09868526 1.09319234 1.09700537
|
|
1.08604312 1.09246111 1.08870125 1.09256554]
|
|
|
|
mean value: 1.0968476772308349
|
|
|
|
key: score_time
|
|
value: [0.08823061 0.09287357 0.09508395 0.0878036 0.0876739 0.08776522
|
|
0.09524059 0.08706617 0.08828473 0.08827615]
|
|
|
|
mean value: 0.08982985019683838
|
|
|
|
key: test_mcc
|
|
value: [0.73192505 0.85391256 0.84515425 1. 0.84515425 0.84515425
|
|
0.35355339 0.50709255 0.70710678 0.70710678]
|
|
|
|
mean value: 0.7396159888530395
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.92307692 0.91666667 1. 0.91666667 0.91666667
|
|
0.66666667 0.75 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8602564102564103
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.90909091 0.90909091 1. 0.90909091 0.90909091
|
|
0.71428571 0.72727273 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8625541125541125
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 0.625 0.8 0.75 0.75 ]
|
|
|
|
mean value: 0.8925
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.83333333 0.83333333 1. 0.83333333 0.83333333
|
|
0.83333333 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.8547619047619048
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.85714286 0.91666667 0.91666667 1. 0.91666667 0.91666667
|
|
0.66666667 0.75 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8607142857142858
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.71428571 0.83333333 0.83333333 1. 0.83333333 0.83333333
|
|
0.55555556 0.57142857 0.75 0.75 ]
|
|
|
|
mean value: 0.7674603174603175
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.8476913 0.88005686 0.92834973 0.86704898 0.88904858 0.90275621
|
|
0.86660075 0.88418841 0.8316164 0.88537502]
|
|
|
|
mean value: 0.8782732248306274
|
|
|
|
key: score_time
|
|
value: [0.21577764 0.22175455 0.19757748 0.23731112 0.23087597 0.23303604
|
|
0.142555 0.12179971 0.20090032 0.22992134]
|
|
|
|
mean value: 0.20315091609954833
|
|
|
|
key: test_mcc
|
|
value: [0.54761905 0.85391256 0.84515425 0.66666667 1. 0.84515425
|
|
0.35355339 0.57735027 0.70710678 0.57735027]
|
|
|
|
mean value: 0.6973867497731787
|
|
|
|
key: train_mcc
|
|
value: [1. 0.98181818 0.98198051 0.98198051 0.98198051 0.98198051
|
|
0.96427411 0.96427411 0.98198051 0.98198051]
|
|
|
|
mean value: 0.9802249440458227
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.92307692 0.91666667 0.83333333 1. 0.91666667
|
|
0.66666667 0.75 0.83333333 0.75 ]
|
|
|
|
mean value: 0.8358974358974359
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99082569 0.99090909 0.99090909 0.99090909 0.99090909
|
|
0.98181818 0.98181818 0.99090909 0.99090909]
|
|
|
|
mean value: 0.9899916597164304
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.90909091 0.90909091 0.83333333 1. 0.90909091
|
|
0.71428571 0.66666667 0.85714286 0.8 ]
|
|
|
|
mean value: 0.8367932067932068
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99082569 0.99099099 0.99099099 0.99082569 0.99099099
|
|
0.98148148 0.98148148 0.99099099 0.99099099]
|
|
|
|
mean value: 0.9899569294064706
|
|
|
|
key: test_precision
|
|
value: [0.83333333 1. 1. 0.83333333 1. 1.
|
|
0.625 1. 0.75 0.66666667]
|
|
|
|
mean value: 0.8708333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.98214286 0.98214286 1. 0.98214286
|
|
1. 1. 0.98214286 0.98214286]
|
|
|
|
mean value: 0.9910714285714286
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.83333333 0.83333333 0.83333333 1. 0.83333333
|
|
0.83333333 0.5 1. 1. ]
|
|
|
|
mean value: 0.8380952380952381
|
|
|
|
key: train_recall
|
|
value: [1. 0.98181818 1. 1. 0.98181818 1.
|
|
0.96363636 0.96363636 1. 1. ]
|
|
|
|
mean value: 0.9890909090909091
|
|
|
|
key: test_roc_auc
|
|
value: [0.77380952 0.91666667 0.91666667 0.83333333 1. 0.91666667
|
|
0.66666667 0.75 0.83333333 0.75 ]
|
|
|
|
mean value: 0.8357142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99090909 0.99090909 0.99090909 0.99090909 0.99090909
|
|
0.98181818 0.98181818 0.99090909 0.99090909]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.83333333 0.83333333 0.71428571 1. 0.83333333
|
|
0.55555556 0.5 0.75 0.66666667]
|
|
|
|
mean value: 0.7311507936507937
|
|
|
|
key: train_jcc
|
|
value: [1. 0.98181818 0.98214286 0.98214286 0.98181818 0.98214286
|
|
0.96363636 0.96363636 0.98214286 0.98214286]
|
|
|
|
mean value: 0.9801623376623376
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02243757 0.00950193 0.00922942 0.00889492 0.00902367 0.00879312
|
|
0.00849319 0.00888419 0.00862622 0.00857925]
|
|
|
|
mean value: 0.010246348381042481
|
|
|
|
key: score_time
|
|
value: [0.01289821 0.00872231 0.0084548 0.00890398 0.00859475 0.00837207
|
|
0.00844049 0.0083189 0.00848746 0.00840735]
|
|
|
|
mean value: 0.00896003246307373
|
|
|
|
key: test_mcc
|
|
value: [ 0.38095238 0.7200823 0.84515425 0.50709255 0. 0.16903085
|
|
0.4472136 -0.16903085 0.57735027 0.4472136 ]
|
|
|
|
mean value: 0.3925058948530645
|
|
|
|
key: train_mcc
|
|
value: [0.63370218 0.66442782 0.6965738 0.6520733 0.55290734 0.65991202
|
|
0.7167132 0.55290734 0.67995868 0.6520733 ]
|
|
|
|
mean value: 0.646124894909005
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.84615385 0.91666667 0.75 0.5 0.58333333
|
|
0.66666667 0.41666667 0.75 0.66666667]
|
|
|
|
mean value: 0.6788461538461539
|
|
|
|
key: train_accuracy
|
|
value: [0.80733945 0.82568807 0.84545455 0.81818182 0.77272727 0.82727273
|
|
0.85454545 0.77272727 0.83636364 0.81818182]
|
|
|
|
mean value: 0.8178482068390325
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.8 0.92307692 0.76923077 0.5 0.61538462
|
|
0.75 0.46153846 0.8 0.75 ]
|
|
|
|
mean value: 0.7083516483516483
|
|
|
|
key: train_fscore
|
|
value: [0.82644628 0.84297521 0.85470085 0.83606557 0.78991597 0.83760684
|
|
0.86440678 0.78991597 0.84745763 0.83606557]
|
|
|
|
mean value: 0.8325556667004752
|
|
|
|
key: test_precision
|
|
value: [0.71428571 1. 0.85714286 0.71428571 0.5 0.57142857
|
|
0.6 0.42857143 0.66666667 0.6 ]
|
|
|
|
mean value: 0.6652380952380952
|
|
|
|
key: train_precision
|
|
value: [0.74626866 0.77272727 0.80645161 0.76119403 0.734375 0.79032258
|
|
0.80952381 0.734375 0.79365079 0.76119403]
|
|
|
|
mean value: 0.7710082785868173
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.66666667 1. 0.83333333 0.5 0.66666667
|
|
1. 0.5 1. 1. ]
|
|
|
|
mean value: 0.7880952380952381
|
|
|
|
key: train_recall
|
|
value: [0.92592593 0.92727273 0.90909091 0.92727273 0.85454545 0.89090909
|
|
0.92727273 0.85454545 0.90909091 0.92727273]
|
|
|
|
mean value: 0.9053198653198653
|
|
|
|
key: test_roc_auc
|
|
value: [0.69047619 0.83333333 0.91666667 0.75 0.5 0.58333333
|
|
0.66666667 0.41666667 0.75 0.66666667]
|
|
|
|
mean value: 0.6773809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [0.80841751 0.82474747 0.84545455 0.81818182 0.77272727 0.82727273
|
|
0.85454545 0.77272727 0.83636364 0.81818182]
|
|
|
|
mean value: 0.8178619528619528
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.66666667 0.85714286 0.625 0.33333333 0.44444444
|
|
0.6 0.3 0.66666667 0.6 ]
|
|
|
|
mean value: 0.5648809523809524
|
|
|
|
key: train_jcc
|
|
value: [0.70422535 0.72857143 0.74626866 0.71830986 0.65277778 0.72058824
|
|
0.76119403 0.65277778 0.73529412 0.71830986]
|
|
|
|
mean value: 0.713831709405786
|
|
|
|
MCC on Blind test: -0.15
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.07693315 0.03835964 0.03832912 0.04274249 0.04329538 0.22246146
|
|
0.03759885 0.04203844 0.04555273 0.04880643]
|
|
|
|
mean value: 0.06361176967620849
|
|
|
|
key: score_time
|
|
value: [0.01022387 0.01000524 0.01051188 0.01060557 0.01058102 0.01180339
|
|
0.01105189 0.01110482 0.01286697 0.0107069 ]
|
|
|
|
mean value: 0.010946154594421387
|
|
|
|
key: test_mcc
|
|
value: [0.85714286 0.85714286 0.70710678 0.84515425 1. 0.84515425
|
|
0.84515425 1. 0.84515425 0.84515425]
|
|
|
|
mean value: 0.8647163769114845
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.92307692 0.83333333 0.91666667 1. 0.91666667
|
|
0.91666667 1. 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9262820512820513
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.92307692 0.8 0.92307692 1. 0.92307692
|
|
0.92307692 1. 0.92307692 0.92307692]
|
|
|
|
mean value: 0.9261538461538461
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.85714286 1. 0.85714286 1. 0.85714286
|
|
0.85714286 1. 0.85714286 0.85714286]
|
|
|
|
mean value: 0.9142857142857143
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 1. 0.66666667 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9523809523809523
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92857143 0.92857143 0.83333333 0.91666667 1. 0.91666667
|
|
0.91666667 1. 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9273809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.85714286 0.66666667 0.85714286 1. 0.85714286
|
|
0.85714286 1. 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8666666666666666
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 1.0
|
|
|
|
Accuracy on Blind test: 1.0
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03949356 0.04584813 0.04769421 0.02098298 0.04267311 0.02022076
|
|
0.02039456 0.03640819 0.0488255 0.11853099]
|
|
|
|
mean value: 0.04410719871520996
|
|
|
|
key: score_time
|
|
value: [0.02225852 0.01988173 0.01393032 0.01173091 0.0118587 0.01173234
|
|
0.01175022 0.02219272 0.02250123 0.0117991 ]
|
|
|
|
mean value: 0.01596357822418213
|
|
|
|
key: test_mcc
|
|
value: [0.69047619 0.54761905 0.16903085 0.33333333 0.84515425 0.35355339
|
|
0.84515425 0.70710678 0.35355339 0.30151134]
|
|
|
|
mean value: 0.5146492838782166
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.76923077 0.58333333 0.66666667 0.91666667 0.66666667
|
|
0.91666667 0.83333333 0.66666667 0.58333333]
|
|
|
|
mean value: 0.7448717948717949
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.76923077 0.54545455 0.66666667 0.90909091 0.71428571
|
|
0.92307692 0.85714286 0.71428571 0.70588235]
|
|
|
|
mean value: 0.7662259309318132
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.71428571 0.6 0.66666667 1. 0.625
|
|
0.85714286 0.75 0.625 0.54545455]
|
|
|
|
mean value: 0.7240692640692641
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.83333333 0.5 0.66666667 0.83333333 0.83333333
|
|
1. 1. 0.83333333 1. ]
|
|
|
|
mean value: 0.8357142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8452381 0.77380952 0.58333333 0.66666667 0.91666667 0.66666667
|
|
0.91666667 0.83333333 0.66666667 0.58333333]
|
|
|
|
mean value: 0.7452380952380953
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.625 0.375 0.5 0.83333333 0.55555556
|
|
0.85714286 0.75 0.55555556 0.54545455]
|
|
|
|
mean value: 0.6347041847041847
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01210737 0.01156068 0.00966477 0.00961876 0.00909805 0.00907135
|
|
0.00956059 0.00903773 0.00853014 0.00888848]
|
|
|
|
mean value: 0.00971379280090332
|
|
|
|
key: score_time
|
|
value: [0.01141524 0.00934243 0.00950909 0.00906658 0.00899553 0.00867343
|
|
0.00900483 0.00826883 0.00837445 0.00829268]
|
|
|
|
mean value: 0.009094309806823731
|
|
|
|
key: test_mcc
|
|
value: [ 0.21957752 0.07142857 0.35355339 0.50709255 -0.16903085 0.57735027
|
|
0. 0. 0.84515425 0.57735027]
|
|
|
|
mean value: 0.29824759734344397
|
|
|
|
key: train_mcc
|
|
value: [0.61538588 0.56012128 0.54626778 0.49618606 0.39482334 0.54772256
|
|
0.40166321 0.54626778 0.5304385 0.47343208]
|
|
|
|
mean value: 0.5112308471867983
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.53846154 0.66666667 0.75 0.41666667 0.75
|
|
0.5 0.5 0.91666667 0.75 ]
|
|
|
|
mean value: 0.6403846153846153
|
|
|
|
key: train_accuracy
|
|
value: [0.80733945 0.77981651 0.77272727 0.74545455 0.69090909 0.77272727
|
|
0.7 0.77272727 0.76363636 0.73636364]
|
|
|
|
mean value: 0.7541701417848207
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.5 0.6 0.72727273 0.46153846 0.66666667
|
|
0.5 0.5 0.92307692 0.8 ]
|
|
|
|
mean value: 0.6345221445221445
|
|
|
|
key: train_fscore
|
|
value: [0.8 0.77777778 0.76635514 0.7254902 0.72580645 0.76190476
|
|
0.68571429 0.77876106 0.75 0.72897196]
|
|
|
|
mean value: 0.7500781637838801
|
|
|
|
key: test_precision
|
|
value: [0.625 0.5 0.75 0.8 0.42857143 1.
|
|
0.5 0.5 0.85714286 0.66666667]
|
|
|
|
mean value: 0.6627380952380952
|
|
|
|
key: train_precision
|
|
value: [0.82352941 0.79245283 0.78846154 0.78723404 0.65217391 0.8
|
|
0.72 0.75862069 0.79591837 0.75 ]
|
|
|
|
mean value: 0.7668390793013704
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.5 0.5 0.66666667 0.5 0.5
|
|
0.5 0.5 1. 1. ]
|
|
|
|
mean value: 0.638095238095238
|
|
|
|
key: train_recall
|
|
value: [0.77777778 0.76363636 0.74545455 0.67272727 0.81818182 0.72727273
|
|
0.65454545 0.8 0.70909091 0.70909091]
|
|
|
|
mean value: 0.7377777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.60714286 0.53571429 0.66666667 0.75 0.41666667 0.75
|
|
0.5 0.5 0.91666667 0.75 ]
|
|
|
|
mean value: 0.6392857142857142
|
|
|
|
key: train_roc_auc
|
|
value: [0.80707071 0.77996633 0.77272727 0.74545455 0.69090909 0.77272727
|
|
0.7 0.77272727 0.76363636 0.73636364]
|
|
|
|
mean value: 0.7541582491582491
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.33333333 0.42857143 0.57142857 0.3 0.5
|
|
0.33333333 0.33333333 0.85714286 0.66666667]
|
|
|
|
mean value: 0.48238095238095235
|
|
|
|
key: train_jcc
|
|
value: [0.66666667 0.63636364 0.62121212 0.56923077 0.56962025 0.61538462
|
|
0.52173913 0.63768116 0.6 0.57352941]
|
|
|
|
mean value: 0.6011427763642144
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01046467 0.01330042 0.01385999 0.01424932 0.01497936 0.01474452
|
|
0.01537991 0.01384139 0.01357126 0.01280785]
|
|
|
|
mean value: 0.013719868659973145
|
|
|
|
key: score_time
|
|
value: [0.00873303 0.01133561 0.01135492 0.01138973 0.01143742 0.01147223
|
|
0.01137185 0.01175857 0.01142645 0.01143861]
|
|
|
|
mean value: 0.011171841621398925
|
|
|
|
key: test_mcc
|
|
value: [0.53674504 0.54761905 0.57735027 0.66666667 1. 0.70710678
|
|
0.4472136 0.50709255 0.4472136 0.57735027]
|
|
|
|
mean value: 0.6014357817810232
|
|
|
|
key: train_mcc
|
|
value: [0.91216737 0.90841751 0.94561086 0.9104463 0.86373129 0.94686415
|
|
1. 1. 0.78590525 0.78590525]
|
|
|
|
mean value: 0.9059047976338707
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.76923077 0.75 0.83333333 1. 0.83333333
|
|
0.66666667 0.75 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7788461538461539
|
|
|
|
key: train_accuracy
|
|
value: [0.95412844 0.95412844 0.97272727 0.95454545 0.92727273 0.97272727
|
|
1. 1. 0.88181818 0.88181818]
|
|
|
|
mean value: 0.9499165971643037
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.76923077 0.66666667 0.83333333 1. 0.8
|
|
0.75 0.76923077 0.75 0.8 ]
|
|
|
|
mean value: 0.7938461538461539
|
|
|
|
key: train_fscore
|
|
value: [0.95575221 0.95412844 0.97247706 0.95327103 0.93220339 0.97196262
|
|
1. 1. 0.89430894 0.89430894]
|
|
|
|
mean value: 0.952841263784572
|
|
|
|
key: test_precision
|
|
value: [0.75 0.71428571 1. 0.83333333 1. 1.
|
|
0.6 0.71428571 0.6 0.66666667]
|
|
|
|
mean value: 0.7878571428571428
|
|
|
|
key: train_precision
|
|
value: [0.91525424 0.96296296 0.98148148 0.98076923 0.87301587 1.
|
|
1. 1. 0.80882353 0.80882353]
|
|
|
|
mean value: 0.9331130844341213
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.83333333 0.5 0.83333333 1. 0.66666667
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.8523809523809524
|
|
|
|
key: train_recall
|
|
value: [1. 0.94545455 0.96363636 0.92727273 1. 0.94545455
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9781818181818182
|
|
|
|
key: test_roc_auc
|
|
value: [0.76190476 0.77380952 0.75 0.83333333 1. 0.83333333
|
|
0.66666667 0.75 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7785714285714287
|
|
|
|
key: train_roc_auc
|
|
value: [0.95454545 0.95420875 0.97272727 0.95454545 0.92727273 0.97272727
|
|
1. 1. 0.88181818 0.88181818]
|
|
|
|
mean value: 0.9499663299663299
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.625 0.5 0.71428571 1. 0.66666667
|
|
0.6 0.625 0.6 0.66666667]
|
|
|
|
mean value: 0.6664285714285714
|
|
|
|
key: train_jcc
|
|
value: [0.91525424 0.9122807 0.94642857 0.91071429 0.87301587 0.94545455
|
|
1. 1. 0.80882353 0.80882353]
|
|
|
|
mean value: 0.9120795273479326
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01306057 0.01292706 0.01352406 0.01307344 0.01314592 0.01318073
|
|
0.01261377 0.01300907 0.01313162 0.01340365]
|
|
|
|
mean value: 0.013106989860534667
|
|
|
|
key: score_time
|
|
value: [0.01061058 0.01146269 0.01154089 0.01144314 0.01147461 0.01149225
|
|
0.01151586 0.01148248 0.0114913 0.01152301]
|
|
|
|
mean value: 0.011403679847717285
|
|
|
|
key: test_mcc
|
|
value: [0.22537447 0.38575837 0.70710678 0.66666667 0.70710678 0.70710678
|
|
0.50709255 0. 0.57735027 0.70710678]
|
|
|
|
mean value: 0.5190669456272426
|
|
|
|
key: train_mcc
|
|
value: [0.7249001 0.82566622 1. 0.87402845 0.89625816 0.94561086
|
|
0.87988269 0.58434871 0.91287093 0.89625816]
|
|
|
|
mean value: 0.8539824272863392
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.69230769 0.83333333 0.83333333 0.83333333 0.83333333
|
|
0.75 0.5 0.75 0.83333333]
|
|
|
|
mean value: 0.7474358974358974
|
|
|
|
key: train_accuracy
|
|
value: [0.8440367 0.90825688 1. 0.93636364 0.94545455 0.97272727
|
|
0.93636364 0.75454545 0.95454545 0.94545455]
|
|
|
|
mean value: 0.9197748123436197
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.6 0.8 0.83333333 0.8 0.8
|
|
0.76923077 0.25 0.8 0.85714286]
|
|
|
|
mean value: 0.7215589312648136
|
|
|
|
key: train_fscore
|
|
value: [0.864 0.90196078 1. 0.9380531 0.94230769 0.97297297
|
|
0.93203883 0.6746988 0.95652174 0.94827586]
|
|
|
|
mean value: 0.9130829778271103
|
|
|
|
key: test_precision
|
|
value: [0.6 0.75 1. 0.83333333 1. 1.
|
|
0.71428571 0.5 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7814285714285715
|
|
|
|
key: train_precision
|
|
value: [0.76056338 0.9787234 1. 0.9137931 1. 0.96428571
|
|
1. 1. 0.91666667 0.90163934]
|
|
|
|
mean value: 0.9435671613199961
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.5 0.66666667 0.83333333 0.66666667 0.66666667
|
|
0.83333333 0.16666667 1. 1. ]
|
|
|
|
mean value: 0.719047619047619
|
|
|
|
key: train_recall
|
|
value: [1. 0.83636364 1. 0.96363636 0.89090909 0.98181818
|
|
0.87272727 0.50909091 1. 1. ]
|
|
|
|
mean value: 0.9054545454545454
|
|
|
|
key: test_roc_auc
|
|
value: [0.5952381 0.67857143 0.83333333 0.83333333 0.83333333 0.83333333
|
|
0.75 0.5 0.75 0.83333333]
|
|
|
|
mean value: 0.7440476190476191
|
|
|
|
key: train_roc_auc
|
|
value: [0.84545455 0.90892256 1. 0.93636364 0.94545455 0.97272727
|
|
0.93636364 0.75454545 0.95454545 0.94545455]
|
|
|
|
mean value: 0.9199831649831649
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.42857143 0.66666667 0.71428571 0.66666667 0.66666667
|
|
0.625 0.14285714 0.66666667 0.75 ]
|
|
|
|
mean value: 0.5872835497835498
|
|
|
|
key: train_jcc
|
|
value: [0.76056338 0.82142857 1. 0.88333333 0.89090909 0.94736842
|
|
0.87272727 0.50909091 0.91666667 0.90163934]
|
|
|
|
mean value: 0.850372698975246
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10641098 0.09527111 0.09607506 0.09576178 0.09663844 0.10044956
|
|
0.10395312 0.10345387 0.10308766 0.10267711]
|
|
|
|
mean value: 0.10037786960601806
|
|
|
|
key: score_time
|
|
value: [0.01469731 0.01537108 0.01479745 0.01546907 0.01498413 0.0153513
|
|
0.01544666 0.01571178 0.01654768 0.01607656]
|
|
|
|
mean value: 0.015445303916931153
|
|
|
|
key: test_mcc
|
|
value: [0.85714286 0.85714286 0.70710678 1. 0.84515425 0.84515425
|
|
0.70710678 0.84515425 0.57735027 0.57735027]
|
|
|
|
mean value: 0.7818662579223611
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.92307692 0.83333333 1. 0.91666667 0.91666667
|
|
0.83333333 0.91666667 0.75 0.75 ]
|
|
|
|
mean value: 0.8762820512820513
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.92307692 0.8 1. 0.90909091 0.92307692
|
|
0.85714286 0.90909091 0.8 0.8 ]
|
|
|
|
mean value: 0.8844555444555444
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.85714286 1. 1. 1. 0.85714286
|
|
0.75 1. 0.66666667 0.66666667]
|
|
|
|
mean value: 0.8797619047619047
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 1. 0.66666667 1. 0.83333333 1.
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.919047619047619
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92857143 0.92857143 0.83333333 1. 0.91666667 0.91666667
|
|
0.83333333 0.91666667 0.75 0.75 ]
|
|
|
|
mean value: 0.8773809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.85714286 0.66666667 1. 0.83333333 0.85714286
|
|
0.75 0.83333333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7988095238095237
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03481913 0.03135228 0.04547691 0.04148483 0.02830768 0.03416061
|
|
0.02987289 0.02936101 0.02780604 0.0321207 ]
|
|
|
|
mean value: 0.03347620964050293
|
|
|
|
key: score_time
|
|
value: [0.01685143 0.02403641 0.02301741 0.02724481 0.02113366 0.02177835
|
|
0.0162611 0.0184648 0.01792693 0.02597737]
|
|
|
|
mean value: 0.02126922607421875
|
|
|
|
key: test_mcc
|
|
value: [1. 0.85714286 0.70710678 1. 0.84515425 1.
|
|
1. 0.66666667 0.84515425 0.84515425]
|
|
|
|
mean value: 0.8766379069181621
|
|
|
|
key: train_mcc
|
|
value: [0.98181211 0.98181818 0.98198051 1. 0.96427411 1.
|
|
0.98198051 0.98198051 1. 1. ]
|
|
|
|
mean value: 0.987384592476027
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.92307692 0.83333333 1. 0.91666667 1.
|
|
1. 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.933974358974359
|
|
|
|
key: train_accuracy
|
|
value: [0.99082569 0.99082569 0.99090909 1. 0.98181818 1.
|
|
0.99090909 0.99090909 1. 1. ]
|
|
|
|
mean value: 0.9936196830692243
|
|
|
|
key: test_fscore
|
|
value: [1. 0.92307692 0.8 1. 0.90909091 1.
|
|
1. 0.83333333 0.92307692 0.92307692]
|
|
|
|
mean value: 0.9311655011655011
|
|
|
|
key: train_fscore
|
|
value: [0.99065421 0.99082569 0.99082569 1. 0.98148148 1.
|
|
0.99082569 0.99082569 1. 1. ]
|
|
|
|
mean value: 0.9935438439382536
|
|
|
|
key: test_precision
|
|
value: [1. 0.85714286 1. 1. 1. 1.
|
|
1. 0.83333333 0.85714286 0.85714286]
|
|
|
|
mean value: 0.9404761904761905
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. 0.83333333 1.
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.9333333333333333
|
|
|
|
key: train_recall
|
|
value: [0.98148148 0.98181818 0.98181818 1. 0.96363636 1.
|
|
0.98181818 0.98181818 1. 1. ]
|
|
|
|
mean value: 0.9872390572390572
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.92857143 0.83333333 1. 0.91666667 1.
|
|
1. 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9345238095238095
|
|
|
|
key: train_roc_auc
|
|
value: [0.99074074 0.99090909 0.99090909 1. 0.98181818 1.
|
|
0.99090909 0.99090909 1. 1. ]
|
|
|
|
mean value: 0.9936195286195286
|
|
|
|
key: test_jcc
|
|
value: [1. 0.85714286 0.66666667 1. 0.83333333 1.
|
|
1. 0.71428571 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8785714285714286
|
|
|
|
key: train_jcc
|
|
value: [0.98148148 0.98181818 0.98181818 1. 0.96363636 1.
|
|
0.98181818 0.98181818 1. 1. ]
|
|
|
|
mean value: 0.9872390572390572
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03032255 0.04664707 0.03462601 0.0402813 0.04814458 0.01796746
|
|
0.01761532 0.01751995 0.04073024 0.01744914]
|
|
|
|
mean value: 0.031130361557006835
|
|
|
|
key: score_time
|
|
value: [0.02038479 0.02008438 0.02181888 0.01989651 0.01219678 0.01192284
|
|
0.01208115 0.01213479 0.0194757 0.01214719]
|
|
|
|
mean value: 0.01621429920196533
|
|
|
|
key: test_mcc
|
|
value: [0.7200823 0.73192505 0.84515425 0.57735027 0.50709255 0.16903085
|
|
0.70710678 0.66666667 0.4472136 0.57735027]
|
|
|
|
mean value: 0.5948972594778249
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.84615385 0.91666667 0.75 0.75 0.58333333
|
|
0.83333333 0.83333333 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7775641025641026
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.875 0.85714286 0.90909091 0.8 0.76923077 0.61538462
|
|
0.85714286 0.83333333 0.75 0.8 ]
|
|
|
|
mean value: 0.8066325341325341
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.75 1. 0.66666667 0.71428571 0.57142857
|
|
0.75 0.83333333 0.6 0.66666667]
|
|
|
|
mean value: 0.733015873015873
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.83333333 1. 0.83333333 0.66666667
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.85714286 0.91666667 0.75 0.75 0.58333333
|
|
0.83333333 0.83333333 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7773809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 0.75 0.83333333 0.66666667 0.625 0.44444444
|
|
0.75 0.71428571 0.6 0.66666667]
|
|
|
|
mean value: 0.6828174603174603
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.26
|
|
|
|
Accuracy on Blind test: 0.38
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.24948788 0.19161344 0.19149232 0.22366023 0.25529861 0.22087312
|
|
0.22636342 0.25814033 0.26029038 0.25357676]
|
|
|
|
mean value: 0.2330796480178833
|
|
|
|
key: score_time
|
|
value: [0.00947237 0.00913167 0.00906587 0.00899434 0.00944662 0.00882816
|
|
0.00939107 0.00938201 0.00915408 0.00993562]
|
|
|
|
mean value: 0.009280180931091309
|
|
|
|
key: test_mcc
|
|
value: [0.69047619 0.85714286 0.70710678 1. 1. 0.84515425
|
|
0.84515425 0.66666667 0.84515425 0.84515425]
|
|
|
|
mean value: 0.8302009514386328
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.92307692 0.83333333 1. 1. 0.91666667
|
|
0.91666667 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9102564102564102
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.92307692 0.8 1. 1. 0.92307692
|
|
0.90909091 0.83333333 0.92307692 0.92307692]
|
|
|
|
mean value: 0.9091874791874792
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.85714286 1. 1. 1. 0.85714286
|
|
1. 0.83333333 0.85714286 0.85714286]
|
|
|
|
mean value: 0.9119047619047619
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 1. 0.66666667 1. 1. 1.
|
|
0.83333333 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.919047619047619
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8452381 0.92857143 0.83333333 1. 1. 0.91666667
|
|
0.91666667 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9107142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.85714286 0.66666667 1. 1. 0.85714286
|
|
0.83333333 0.71428571 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8392857142857143
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01531339 0.01948166 0.01559758 0.01585889 0.03095436 0.02038884
|
|
0.02338004 0.03243804 0.01676536 0.09134436]
|
|
|
|
mean value: 0.02815225124359131
|
|
|
|
key: score_time
|
|
value: [0.01224422 0.01242447 0.01191473 0.01191139 0.01261568 0.01313353
|
|
0.01212215 0.01214504 0.01268935 0.0126431 ]
|
|
|
|
mean value: 0.012384366989135743
|
|
|
|
key: test_mcc
|
|
value: [0.38575837 0.73192505 0.50709255 0.57735027 0.35355339 0.50709255
|
|
0.57735027 0.84515425 0.30151134 0.84515425]
|
|
|
|
mean value: 0.5631942318298172
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.84615385 0.75 0.75 0.66666667 0.75
|
|
0.75 0.91666667 0.58333333 0.91666667]
|
|
|
|
mean value: 0.7621794871794871
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.85714286 0.72727273 0.8 0.71428571 0.76923077
|
|
0.8 0.92307692 0.70588235 0.92307692]
|
|
|
|
mean value: 0.796996826702709
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.8 0.66666667 0.625 0.71428571
|
|
0.66666667 0.85714286 0.54545455 0.85714286]
|
|
|
|
mean value: 0.7149025974025974
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 1. 0.66666667 1. 0.83333333 0.83333333
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.919047619047619
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.67857143 0.85714286 0.75 0.75 0.66666667 0.75
|
|
0.75 0.91666667 0.58333333 0.91666667]
|
|
|
|
mean value: 0.761904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.75 0.57142857 0.66666667 0.55555556 0.625
|
|
0.66666667 0.85714286 0.54545455 0.85714286]
|
|
|
|
mean value: 0.669505772005772
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04693675 0.02648091 0.03335214 0.03273988 0.03322816 0.02680969
|
|
0.04480362 0.03316164 0.05596161 0.0371058 ]
|
|
|
|
mean value: 0.037058019638061525
|
|
|
|
key: score_time
|
|
value: [0.02066731 0.01168585 0.02279043 0.02336931 0.02314258 0.02253413
|
|
0.02336025 0.02933931 0.02206159 0.01648211]
|
|
|
|
mean value: 0.021543288230895997
|
|
|
|
key: test_mcc
|
|
value: [0.54761905 0.54761905 0.70710678 0.84515425 1. 1.
|
|
0.4472136 0.50709255 0.70710678 0.70710678]
|
|
|
|
mean value: 0.7016018841863323
|
|
|
|
key: train_mcc
|
|
value: [0.98181818 0.98181211 0.98198051 0.98198051 0.98198051 0.98198051
|
|
1. 0.98198051 0.98198051 0.96363636]
|
|
|
|
mean value: 0.9819149695448404
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.76923077 0.83333333 0.91666667 1. 1.
|
|
0.66666667 0.75 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8371794871794872
|
|
|
|
key: train_accuracy
|
|
value: [0.99082569 0.99082569 0.99090909 0.99090909 0.99090909 0.99090909
|
|
1. 0.99090909 0.99090909 0.98181818]
|
|
|
|
mean value: 0.9908924103419516
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.76923077 0.8 0.92307692 1. 1.
|
|
0.75 0.76923077 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8495054945054945
|
|
|
|
key: train_fscore
|
|
value: [0.99082569 0.99099099 0.99099099 0.99099099 0.99099099 0.99099099
|
|
1. 0.99099099 0.99099099 0.98181818]
|
|
|
|
mean value: 0.9909580806828512
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.71428571 1. 0.85714286 1. 1.
|
|
0.6 0.71428571 0.75 0.75 ]
|
|
|
|
mean value: 0.8219047619047619
|
|
|
|
key: train_precision
|
|
value: [0.98181818 0.98214286 0.98214286 0.98214286 0.98214286 0.98214286
|
|
1. 0.98214286 0.98214286 0.98181818]
|
|
|
|
mean value: 0.9838636363636364
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.83333333 0.66666667 1. 1. 1.
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.9047619047619048
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_8020.py:128: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_8020.py:131: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.98181818]
|
|
|
|
mean value: 0.9981818181818182
|
|
|
|
key: test_roc_auc
|
|
value: [0.77380952 0.77380952 0.83333333 0.91666667 1. 1.
|
|
0.66666667 0.75 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8380952380952381
|
|
|
|
key: train_roc_auc
|
|
value: [0.99090909 0.99074074 0.99090909 0.99090909 0.99090909 0.99090909
|
|
1. 0.99090909 0.99090909 0.98181818]
|
|
|
|
mean value: 0.9908922558922558
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.625 0.66666667 0.85714286 1. 1.
|
|
0.6 0.625 0.75 0.75 ]
|
|
|
|
mean value: 0.7498809523809524
|
|
|
|
key: train_jcc
|
|
value: [0.98181818 0.98214286 0.98214286 0.98214286 0.98214286 0.98214286
|
|
1. 0.98214286 0.98214286 0.96428571]
|
|
|
|
mean value: 0.9821103896103895
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.09976268 0.14996791 0.21591544 0.19544363 0.21583414 0.27297521
|
|
0.20447612 0.20294356 0.21476102 0.20324492]
|
|
|
|
mean value: 0.19753246307373046
|
|
|
|
key: score_time
|
|
value: [0.01179314 0.02238917 0.02303171 0.02025199 0.02110195 0.02333355
|
|
0.0235424 0.02158618 0.02197886 0.02189255]
|
|
|
|
mean value: 0.021090149879455566
|
|
|
|
key: test_mcc
|
|
value: [0.54761905 0.54761905 0.70710678 0.84515425 1. 1.
|
|
0.4472136 0.50709255 0.70710678 0.70710678]
|
|
|
|
mean value: 0.7016018841863323
|
|
|
|
key: train_mcc
|
|
value: [0.98181818 0.98181211 0.98198051 0.98198051 0.98198051 0.98198051
|
|
1. 0.98198051 0.98198051 0.96363636]
|
|
|
|
mean value: 0.9819149695448404
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.76923077 0.83333333 0.91666667 1. 1.
|
|
0.66666667 0.75 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8371794871794872
|
|
|
|
key: train_accuracy
|
|
value: [0.99082569 0.99082569 0.99090909 0.99090909 0.99090909 0.99090909
|
|
1. 0.99090909 0.99090909 0.98181818]
|
|
|
|
mean value: 0.9908924103419516
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.76923077 0.8 0.92307692 1. 1.
|
|
0.75 0.76923077 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8495054945054945
|
|
|
|
key: train_fscore
|
|
value: [0.99082569 0.99099099 0.99099099 0.99099099 0.99099099 0.99099099
|
|
1. 0.99099099 0.99099099 0.98181818]
|
|
|
|
mean value: 0.9909580806828512
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.71428571 1. 0.85714286 1. 1.
|
|
0.6 0.71428571 0.75 0.75 ]
|
|
|
|
mean value: 0.8219047619047619
|
|
|
|
key: train_precision
|
|
value: [0.98181818 0.98214286 0.98214286 0.98214286 0.98214286 0.98214286
|
|
1. 0.98214286 0.98214286 0.98181818]
|
|
|
|
mean value: 0.9838636363636364
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.83333333 0.66666667 1. 1. 1.
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.9047619047619048
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.98181818]
|
|
|
|
mean value: 0.9981818181818182
|
|
|
|
key: test_roc_auc
|
|
value: [0.77380952 0.77380952 0.83333333 0.91666667 1. 1.
|
|
0.66666667 0.75 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8380952380952381
|
|
|
|
key: train_roc_auc
|
|
value: [0.99090909 0.99074074 0.99090909 0.99090909 0.99090909 0.99090909
|
|
1. 0.99090909 0.99090909 0.98181818]
|
|
|
|
mean value: 0.9908922558922558
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.625 0.66666667 0.85714286 1. 1.
|
|
0.6 0.625 0.75 0.75 ]
|
|
|
|
mean value: 0.7498809523809524
|
|
|
|
key: train_jcc
|
|
value: [0.98181818 0.98214286 0.98214286 0.98214286 0.98214286 0.98214286
|
|
1. 0.98214286 0.98214286 0.96428571]
|
|
|
|
mean value: 0.9821103896103895
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02341318 0.02947307 0.02594304 0.03807855 0.0279057 0.02738404
|
|
0.02577353 0.02728701 0.02514648 0.0268805 ]
|
|
|
|
mean value: 0.027728509902954102
|
|
|
|
key: score_time
|
|
value: [0.01184964 0.01186442 0.01181912 0.01211095 0.01173472 0.01190186
|
|
0.01171422 0.01183414 0.01187754 0.01192617]
|
|
|
|
mean value: 0.011863279342651366
|
|
|
|
key: test_mcc
|
|
value: [1. 0.73192505 0.57735027 0.66666667 1. 0.50709255
|
|
0.70710678 0.50709255 1. 0.57735027]
|
|
|
|
mean value: 0.7274584146618086
|
|
|
|
key: train_mcc
|
|
value: [0.90967354 0.90958351 0.89090909 0.89090909 0.9104463 0.89149871
|
|
0.94561086 0.92727273 0.92788641 0.9104463 ]
|
|
|
|
mean value: 0.9114236529454406
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.84615385 0.75 0.83333333 1. 0.75
|
|
0.83333333 0.75 1. 0.75 ]
|
|
|
|
mean value: 0.8512820512820513
|
|
|
|
key: train_accuracy
|
|
value: [0.95412844 0.95412844 0.94545455 0.94545455 0.95454545 0.94545455
|
|
0.97272727 0.96363636 0.96363636 0.95454545]
|
|
|
|
mean value: 0.955371142618849
|
|
|
|
key: test_fscore
|
|
value: [1. 0.85714286 0.66666667 0.83333333 1. 0.76923077
|
|
0.85714286 0.76923077 1. 0.8 ]
|
|
|
|
mean value: 0.8552747252747253
|
|
|
|
key: train_fscore
|
|
value: [0.95495495 0.95575221 0.94545455 0.94545455 0.95575221 0.94642857
|
|
0.97297297 0.96363636 0.96428571 0.95575221]
|
|
|
|
mean value: 0.956044430535581
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 1. 0.83333333 1. 0.71428571
|
|
0.75 0.71428571 1. 0.66666667]
|
|
|
|
mean value: 0.8428571428571429
|
|
|
|
key: train_precision
|
|
value: [0.92982456 0.93103448 0.94545455 0.94545455 0.93103448 0.92982456
|
|
0.96428571 0.96363636 0.94736842 0.93103448]
|
|
|
|
mean value: 0.941895216096668
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.5 0.83333333 1. 0.83333333
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_recall
|
|
value: [0.98148148 0.98181818 0.94545455 0.94545455 0.98181818 0.96363636
|
|
0.98181818 0.96363636 0.98181818 0.98181818]
|
|
|
|
mean value: 0.9708754208754209
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.85714286 0.75 0.83333333 1. 0.75
|
|
0.83333333 0.75 1. 0.75 ]
|
|
|
|
mean value: 0.8523809523809525
|
|
|
|
key: train_roc_auc
|
|
value: [0.9543771 0.95387205 0.94545455 0.94545455 0.95454545 0.94545455
|
|
0.97272727 0.96363636 0.96363636 0.95454545]
|
|
|
|
mean value: 0.9553703703703703
|
|
|
|
key: test_jcc
|
|
value: [1. 0.75 0.5 0.71428571 1. 0.625
|
|
0.75 0.625 1. 0.66666667]
|
|
|
|
mean value: 0.763095238095238
|
|
|
|
key: train_jcc
|
|
value: [0.9137931 0.91525424 0.89655172 0.89655172 0.91525424 0.89830508
|
|
0.94736842 0.92982456 0.93103448 0.91525424]
|
|
|
|
mean value: 0.9159191813549068
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.81304193 0.6425631 0.7236855 0.77282071 0.68632269 0.64395881
|
|
0.83264065 1.10284615 1.17330837 0.61021018]
|
|
|
|
mean value: 0.8001398086547852
|
|
|
|
key: score_time
|
|
value: [0.02064586 0.01328111 0.0133636 0.01350069 0.01313186 0.01327395
|
|
0.01351237 0.01313639 0.01351643 0.01262403]
|
|
|
|
mean value: 0.013998627662658691
|
|
|
|
key: test_mcc
|
|
value: [1. 0.73192505 0.70710678 0.84515425 1. 0.70710678
|
|
0.70710678 0.70710678 0.70710678 0.70710678]
|
|
|
|
mean value: 0.7819719996559202
|
|
|
|
key: train_mcc
|
|
value: [0.946411 0.94509941 0.96427411 1. 0.92788641 0.96427411
|
|
1. 1. 1. 0.96427411]
|
|
|
|
mean value: 0.9712219155354007
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.84615385 0.83333333 0.91666667 1. 0.83333333
|
|
0.83333333 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8762820512820513
|
|
|
|
key: train_accuracy
|
|
value: [0.97247706 0.97247706 0.98181818 1. 0.96363636 0.98181818
|
|
1. 1. 1. 0.98181818]
|
|
|
|
mean value: 0.9854045037531276
|
|
|
|
key: test_fscore
|
|
value: [1. 0.85714286 0.8 0.92307692 1. 0.85714286
|
|
0.85714286 0.85714286 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8865934065934066
|
|
|
|
key: train_fscore
|
|
value: [0.97297297 0.97297297 0.98214286 1. 0.96428571 0.98214286
|
|
1. 1. 1. 0.98214286]
|
|
|
|
mean value: 0.9856660231660231
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 1. 0.85714286 1. 0.75
|
|
0.75 0.75 0.75 0.75 ]
|
|
|
|
mean value: 0.8357142857142857
|
|
|
|
key: train_precision
|
|
value: [0.94736842 0.96428571 0.96491228 1. 0.94736842 0.96491228
|
|
1. 1. 1. 0.96491228]
|
|
|
|
mean value: 0.975375939849624
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 0.98181818 1. 1. 0.98181818 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9963636363636363
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.85714286 0.83333333 0.91666667 1. 0.83333333
|
|
0.83333333 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8773809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [0.97272727 0.97239057 0.98181818 1. 0.96363636 0.98181818
|
|
1. 1. 1. 0.98181818]
|
|
|
|
mean value: 0.9854208754208754
|
|
|
|
key: test_jcc
|
|
value: [1. 0.75 0.66666667 0.85714286 1. 0.75
|
|
0.75 0.75 0.75 0.75 ]
|
|
|
|
mean value: 0.8023809523809524
|
|
|
|
key: train_jcc
|
|
value: [0.94736842 0.94736842 0.96491228 1. 0.93103448 0.96491228
|
|
1. 1. 1. 0.96491228]
|
|
|
|
mean value: 0.9720508166969147
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01309919 0.00971413 0.00912786 0.00924087 0.00932741 0.0093441
|
|
0.01008654 0.01022816 0.01020265 0.00987601]
|
|
|
|
mean value: 0.010024690628051757
|
|
|
|
key: score_time
|
|
value: [0.01280189 0.00907016 0.00919104 0.00947571 0.00938964 0.0095365
|
|
0.00943804 0.00945926 0.00966907 0.0097394 ]
|
|
|
|
mean value: 0.009777069091796875
|
|
|
|
key: test_mcc
|
|
value: [ 0.03289758 0.28288947 0.66666667 -0.19245009 0.19245009 0.19245009
|
|
0.19245009 0.35355339 0. -0.30151134]
|
|
|
|
mean value: 0.14193959518204174
|
|
|
|
key: train_mcc
|
|
value: [0.46846888 0.41649508 0.67995868 0.36028835 0.39344474 0.42796049
|
|
0.41659779 0.55110775 0.44907312 0.38393633]
|
|
|
|
mean value: 0.4547331207160512
|
|
|
|
key: test_accuracy
|
|
value: [0.53846154 0.61538462 0.83333333 0.41666667 0.58333333 0.58333333
|
|
0.58333333 0.66666667 0.5 0.41666667]
|
|
|
|
mean value: 0.5737179487179487
|
|
|
|
key: train_accuracy
|
|
value: [0.69724771 0.68807339 0.83636364 0.66363636 0.68181818 0.68181818
|
|
0.69090909 0.76363636 0.7 0.67272727]
|
|
|
|
mean value: 0.7076230191826522
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.83333333 0.53333333 0.66666667 0.66666667
|
|
0.66666667 0.71428571 0.625 0.58823529]
|
|
|
|
mean value: 0.6627521008403362
|
|
|
|
key: train_fscore
|
|
value: [0.75912409 0.74626866 0.84745763 0.72180451 0.73282443 0.74820144
|
|
0.74242424 0.79365079 0.75555556 0.73134328]
|
|
|
|
mean value: 0.7578654624247017
|
|
|
|
key: test_precision
|
|
value: [0.54545455 0.55555556 0.83333333 0.44444444 0.55555556 0.55555556
|
|
0.55555556 0.625 0.5 0.45454545]
|
|
|
|
mean value: 0.5625
|
|
|
|
key: train_precision
|
|
value: [0.62650602 0.63291139 0.79365079 0.61538462 0.63157895 0.61904762
|
|
0.63636364 0.70422535 0.6375 0.62025316]
|
|
|
|
mean value: 0.6517421544986173
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.83333333 0.83333333 0.66666667 0.83333333 0.83333333
|
|
0.83333333 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.819047619047619
|
|
|
|
key: train_recall
|
|
value: [0.96296296 0.90909091 0.90909091 0.87272727 0.87272727 0.94545455
|
|
0.89090909 0.90909091 0.92727273 0.89090909]
|
|
|
|
mean value: 0.909023569023569
|
|
|
|
key: test_roc_auc
|
|
value: [0.51190476 0.63095238 0.83333333 0.41666667 0.58333333 0.58333333
|
|
0.58333333 0.66666667 0.5 0.41666667]
|
|
|
|
mean value: 0.5726190476190477
|
|
|
|
key: train_roc_auc
|
|
value: [0.6996633 0.68602694 0.83636364 0.66363636 0.68181818 0.68181818
|
|
0.69090909 0.76363636 0.7 0.67272727]
|
|
|
|
mean value: 0.7076599326599327
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.71428571 0.36363636 0.5 0.5
|
|
0.5 0.55555556 0.45454545 0.41666667]
|
|
|
|
mean value: 0.5004689754689755
|
|
|
|
key: train_jcc
|
|
value: [0.61176471 0.5952381 0.73529412 0.56470588 0.57831325 0.59770115
|
|
0.59036145 0.65789474 0.60714286 0.57647059]
|
|
|
|
mean value: 0.6114886831561173
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00948119 0.01198602 0.00978541 0.0090363 0.00904512 0.01059604
|
|
0.00988412 0.01315379 0.00978279 0.01021671]
|
|
|
|
mean value: 0.01029675006866455
|
|
|
|
key: score_time
|
|
value: [0.00958657 0.01180077 0.00924706 0.00959849 0.00926113 0.00877857
|
|
0.00982285 0.01357889 0.01013613 0.00970221]
|
|
|
|
mean value: 0.010151267051696777
|
|
|
|
key: test_mcc
|
|
value: [0.05143445 0.53674504 0.16903085 0.50709255 0.35355339 0.33333333
|
|
0.35355339 0.35355339 0.35355339 0.57735027]
|
|
|
|
mean value: 0.35892000587879247
|
|
|
|
key: train_mcc
|
|
value: [0.55982502 0.48669588 0.58268563 0.64320415 0.54772256 0.61910348
|
|
0.71097366 0.6401844 0.65726707 0.58268563]
|
|
|
|
mean value: 0.6030347502793679
|
|
|
|
key: test_accuracy
|
|
value: [0.53846154 0.76923077 0.58333333 0.75 0.66666667 0.66666667
|
|
0.66666667 0.66666667 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6724358974358974
|
|
|
|
key: train_accuracy
|
|
value: [0.77981651 0.74311927 0.79090909 0.81818182 0.77272727 0.80909091
|
|
0.85454545 0.81818182 0.82727273 0.79090909]
|
|
|
|
mean value: 0.8004753961634695
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.72727273 0.54545455 0.76923077 0.6 0.66666667
|
|
0.71428571 0.6 0.6 0.8 ]
|
|
|
|
mean value: 0.6647910422910422
|
|
|
|
key: train_fscore
|
|
value: [0.77358491 0.74074074 0.78504673 0.83050847 0.76190476 0.81415929
|
|
0.85964912 0.82758621 0.83478261 0.79646018]
|
|
|
|
mean value: 0.8024423019279884
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.8 0.6 0.71428571 0.75 0.66666667
|
|
0.625 0.75 0.75 0.66666667]
|
|
|
|
mean value: 0.6878174603174603
|
|
|
|
key: train_precision
|
|
value: [0.78846154 0.75471698 0.80769231 0.77777778 0.8 0.79310345
|
|
0.83050847 0.78688525 0.8 0.77586207]
|
|
|
|
mean value: 0.791500784278299
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.66666667 0.5 0.83333333 0.5 0.66666667
|
|
0.83333333 0.5 0.5 1. ]
|
|
|
|
mean value: 0.6714285714285715
|
|
|
|
key: train_recall
|
|
value: [0.75925926 0.72727273 0.76363636 0.89090909 0.72727273 0.83636364
|
|
0.89090909 0.87272727 0.87272727 0.81818182]
|
|
|
|
mean value: 0.8159259259259259
|
|
|
|
key: test_roc_auc
|
|
value: [0.52380952 0.76190476 0.58333333 0.75 0.66666667 0.66666667
|
|
0.66666667 0.66666667 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6702380952380953
|
|
|
|
key: train_roc_auc
|
|
value: [0.77962963 0.74326599 0.79090909 0.81818182 0.77272727 0.80909091
|
|
0.85454545 0.81818182 0.82727273 0.79090909]
|
|
|
|
mean value: 0.8004713804713804
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.57142857 0.375 0.625 0.42857143 0.5
|
|
0.55555556 0.42857143 0.42857143 0.66666667]
|
|
|
|
mean value: 0.5033910533910534
|
|
|
|
key: train_jcc
|
|
value: [0.63076923 0.58823529 0.64615385 0.71014493 0.61538462 0.68656716
|
|
0.75384615 0.70588235 0.71641791 0.66176471]
|
|
|
|
mean value: 0.671516620125812
|
|
|
|
MCC on Blind test: -0.02
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00977969 0.00957274 0.01054049 0.00951505 0.00953817 0.00853753
|
|
0.00867605 0.00890112 0.00862265 0.0086143 ]
|
|
|
|
mean value: 0.009229779243469238
|
|
|
|
key: score_time
|
|
value: [0.01461411 0.01574564 0.01428533 0.01525021 0.01500368 0.00962567
|
|
0.00955153 0.01138282 0.01003003 0.01652288]
|
|
|
|
mean value: 0.013201189041137696
|
|
|
|
key: test_mcc
|
|
value: [0.05143445 0.54761905 0.50709255 0.19245009 0.50709255 0.16903085
|
|
0.16903085 0.66666667 0.50709255 0.50709255]
|
|
|
|
mean value: 0.38246021672428
|
|
|
|
key: train_mcc
|
|
value: [0.67172877 0.63944827 0.61017022 0.58423739 0.6634888 0.65552134
|
|
0.65991202 0.65726707 0.67995868 0.61017022]
|
|
|
|
mean value: 0.643190277433704
|
|
|
|
key: test_accuracy
|
|
value: [0.53846154 0.76923077 0.75 0.58333333 0.75 0.58333333
|
|
0.58333333 0.83333333 0.75 0.75 ]
|
|
|
|
mean value: 0.6891025641025641
|
|
|
|
key: train_accuracy
|
|
value: [0.83486239 0.81651376 0.8 0.79090909 0.82727273 0.82727273
|
|
0.82727273 0.82727273 0.83636364 0.8 ]
|
|
|
|
mean value: 0.8187739783152628
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.76923077 0.76923077 0.66666667 0.72727273 0.61538462
|
|
0.61538462 0.83333333 0.76923077 0.76923077]
|
|
|
|
mean value: 0.7159965034965035
|
|
|
|
key: train_fscore
|
|
value: [0.83928571 0.83050847 0.81666667 0.8 0.84033613 0.83185841
|
|
0.83760684 0.83478261 0.84745763 0.81666667]
|
|
|
|
mean value: 0.829516913714988
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.71428571 0.71428571 0.55555556 0.8 0.57142857
|
|
0.57142857 0.83333333 0.71428571 0.71428571]
|
|
|
|
mean value: 0.6744444444444444
|
|
|
|
key: train_precision
|
|
value: [0.81034483 0.77777778 0.75384615 0.76666667 0.78125 0.81034483
|
|
0.79032258 0.8 0.79365079 0.75384615]
|
|
|
|
mean value: 0.7838049781605121
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.83333333 0.83333333 0.83333333 0.66666667 0.66666667
|
|
0.66666667 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7714285714285715
|
|
|
|
key: train_recall
|
|
value: [0.87037037 0.89090909 0.89090909 0.83636364 0.90909091 0.85454545
|
|
0.89090909 0.87272727 0.90909091 0.89090909]
|
|
|
|
mean value: 0.8815824915824916
|
|
|
|
key: test_roc_auc
|
|
value: [0.52380952 0.77380952 0.75 0.58333333 0.75 0.58333333
|
|
0.58333333 0.83333333 0.75 0.75 ]
|
|
|
|
mean value: 0.6880952380952382
|
|
|
|
key: train_roc_auc
|
|
value: [0.83518519 0.81582492 0.8 0.79090909 0.82727273 0.82727273
|
|
0.82727273 0.82727273 0.83636364 0.8 ]
|
|
|
|
mean value: 0.8187373737373738
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.625 0.625 0.5 0.57142857 0.44444444
|
|
0.44444444 0.71428571 0.625 0.625 ]
|
|
|
|
mean value: 0.5629148629148629
|
|
|
|
key: train_jcc
|
|
value: [0.72307692 0.71014493 0.69014085 0.66666667 0.72463768 0.71212121
|
|
0.72058824 0.71641791 0.73529412 0.69014085]
|
|
|
|
mean value: 0.7089229364090237
|
|
|
|
MCC on Blind test: -0.02
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01164937 0.01075912 0.00952315 0.00953531 0.01028419 0.00965428
|
|
0.00968552 0.00964093 0.00956464 0.01076889]
|
|
|
|
mean value: 0.010106539726257325
|
|
|
|
key: score_time
|
|
value: [0.00977969 0.0090971 0.00901127 0.00892258 0.00886559 0.00884891
|
|
0.0091269 0.00902295 0.00891376 0.0089829 ]
|
|
|
|
mean value: 0.009057164192199707
|
|
|
|
key: test_mcc
|
|
value: [0.59160798 0.73192505 0.57735027 0. 0.66666667 0.50709255
|
|
0.50709255 0.50709255 0.84515425 0.57735027]
|
|
|
|
mean value: 0.5511332151307126
|
|
|
|
key: train_mcc
|
|
value: [0.87293607 0.90958351 0.7823356 0.87287156 0.81818182 0.87287156
|
|
0.85454545 0.89090909 0.8932746 0.87287156]
|
|
|
|
mean value: 0.8640380829678824
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.84615385 0.75 0.5 0.83333333 0.75
|
|
0.75 0.75 0.91666667 0.75 ]
|
|
|
|
mean value: 0.7615384615384615
|
|
|
|
key: train_accuracy
|
|
value: [0.93577982 0.95412844 0.89090909 0.93636364 0.90909091 0.93636364
|
|
0.92727273 0.94545455 0.94545455 0.93636364]
|
|
|
|
mean value: 0.9317180984153461
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.85714286 0.66666667 0.5 0.83333333 0.76923077
|
|
0.76923077 0.72727273 0.92307692 0.8 ]
|
|
|
|
mean value: 0.7669483457718751
|
|
|
|
key: train_fscore
|
|
value: [0.93693694 0.95575221 0.89285714 0.93693694 0.90909091 0.93693694
|
|
0.92727273 0.94545455 0.94736842 0.93577982]
|
|
|
|
mean value: 0.9324386585441908
|
|
|
|
key: test_precision
|
|
value: [0.7 0.75 1. 0.5 0.83333333 0.71428571
|
|
0.71428571 0.8 0.85714286 0.66666667]
|
|
|
|
mean value: 0.7535714285714286
|
|
|
|
key: train_precision
|
|
value: [0.9122807 0.93103448 0.87719298 0.92857143 0.90909091 0.92857143
|
|
0.92727273 0.94545455 0.91525424 0.94444444]
|
|
|
|
mean value: 0.9219167887662766
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.5 0.5 0.83333333 0.83333333
|
|
0.83333333 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: train_recall
|
|
value: [0.96296296 0.98181818 0.90909091 0.94545455 0.90909091 0.94545455
|
|
0.92727273 0.94545455 0.98181818 0.92727273]
|
|
|
|
mean value: 0.9435690235690235
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.85714286 0.75 0.5 0.83333333 0.75
|
|
0.75 0.75 0.91666667 0.75 ]
|
|
|
|
mean value: 0.7607142857142858
|
|
|
|
key: train_roc_auc
|
|
value: [0.93602694 0.95387205 0.89090909 0.93636364 0.90909091 0.93636364
|
|
0.92727273 0.94545455 0.94545455 0.93636364]
|
|
|
|
mean value: 0.9317171717171717
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.75 0.5 0.33333333 0.71428571 0.625
|
|
0.625 0.57142857 0.85714286 0.66666667]
|
|
|
|
mean value: 0.6342857142857142
|
|
|
|
key: train_jcc
|
|
value: [0.88135593 0.91525424 0.80645161 0.88135593 0.83333333 0.88135593
|
|
0.86440678 0.89655172 0.9 0.87931034]
|
|
|
|
mean value: 0.8739375828761399
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.48529983 0.68036675 0.6581347 0.54270411 0.85770082 0.76672292
|
|
0.43389964 0.44449854 0.54217029 0.63106346]
|
|
|
|
mean value: 0.6042561054229736
|
|
|
|
key: score_time
|
|
value: [0.0127604 0.0125227 0.01281142 0.01241446 0.01554918 0.01207995
|
|
0.01212215 0.01213694 0.01220894 0.01212549]
|
|
|
|
mean value: 0.012673163414001464
|
|
|
|
key: test_mcc
|
|
value: [0.85391256 0.73192505 0.70710678 0.84515425 1. 0.70710678
|
|
0.50709255 0.70710678 0.84515425 0.70710678]
|
|
|
|
mean value: 0.76116658055817
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.84615385 0.83333333 0.91666667 1. 0.83333333
|
|
0.75 0.83333333 0.91666667 0.83333333]
|
|
|
|
mean value: 0.8685897435897436
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.85714286 0.8 0.92307692 1. 0.85714286
|
|
0.76923077 0.85714286 0.92307692 0.85714286]
|
|
|
|
mean value: 0.8777289377289377
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 0.75 1. 0.85714286 1. 0.75
|
|
0.71428571 0.75 0.85714286 0.75 ]
|
|
|
|
mean value: 0.8303571428571428
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. 1. 1.
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.85714286 0.83333333 0.91666667 1. 0.83333333
|
|
0.75 0.83333333 0.91666667 0.83333333]
|
|
|
|
mean value: 0.8690476190476191
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.75 0.66666667 0.85714286 1. 0.75
|
|
0.625 0.75 0.85714286 0.75 ]
|
|
|
|
mean value: 0.7880952380952381
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01438713 0.01367998 0.01034427 0.01018953 0.01007009 0.01011705
|
|
0.01004481 0.01011109 0.01047134 0.01029015]
|
|
|
|
mean value: 0.010970544815063477
|
|
|
|
key: score_time
|
|
value: [0.01147985 0.00901175 0.00891161 0.00851297 0.00833535 0.00835729
|
|
0.00853109 0.00848222 0.00848532 0.00848055]
|
|
|
|
mean value: 0.008858799934387207
|
|
|
|
key: test_mcc
|
|
value: [0.85391256 0.85714286 0.70710678 1. 1. 0.70710678
|
|
1. 0.70710678 0.84515425 0.84515425]
|
|
|
|
mean value: 0.8522684273989499
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.92307692 0.83333333 1. 1. 0.83333333
|
|
1. 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.917948717948718
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.92307692 0.8 1. 1. 0.85714286
|
|
1. 0.85714286 0.92307692 0.92307692]
|
|
|
|
mean value: 0.9216849816849817
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 0.85714286 1. 1. 1. 0.75
|
|
1. 0.75 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8946428571428571
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.92857143 0.83333333 1. 1. 0.83333333
|
|
1. 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9178571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.85714286 0.66666667 1. 1. 0.75
|
|
1. 0.75 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8613095238095237
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0830164 0.08358073 0.08367062 0.08567047 0.08646011 0.08686233
|
|
0.08595514 0.08737564 0.08717918 0.08413959]
|
|
|
|
mean value: 0.08539102077484131
|
|
|
|
key: score_time
|
|
value: [0.01694441 0.01717091 0.01712728 0.01713872 0.01881862 0.01864886
|
|
0.01748109 0.01802015 0.01842237 0.01695228]
|
|
|
|
mean value: 0.017672467231750488
|
|
|
|
key: test_mcc
|
|
value: [0.85391256 1. 0.84515425 0.84515425 0.84515425 0.84515425
|
|
0.50709255 0.66666667 0.84515425 0.50709255]
|
|
|
|
mean value: 0.7760535609813436
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 1. 0.91666667 0.91666667 0.91666667 0.91666667
|
|
0.75 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8839743589743589
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 1. 0.90909091 0.92307692 0.90909091 0.92307692
|
|
0.76923077 0.83333333 0.92307692 0.76923077]
|
|
|
|
mean value: 0.8892540792540792
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 1. 1. 0.85714286 1. 0.85714286
|
|
0.71428571 0.83333333 0.85714286 0.71428571]
|
|
|
|
mean value: 0.8708333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.83333333 1. 0.83333333 1.
|
|
0.83333333 0.83333333 1. 0.83333333]
|
|
|
|
mean value: 0.9166666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 1. 0.91666667 0.91666667 0.91666667 0.91666667
|
|
0.75 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8833333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 1. 0.83333333 0.85714286 0.83333333 0.85714286
|
|
0.625 0.71428571 0.85714286 0.625 ]
|
|
|
|
mean value: 0.8077380952380953
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00878477 0.00989771 0.00971699 0.00872874 0.00962758 0.00880098
|
|
0.0087595 0.00977635 0.00923634 0.00971937]
|
|
|
|
mean value: 0.00930483341217041
|
|
|
|
key: score_time
|
|
value: [0.00859141 0.00919747 0.00928545 0.00860429 0.00931072 0.00917411
|
|
0.00899053 0.00922346 0.00866199 0.00929546]
|
|
|
|
mean value: 0.009033489227294921
|
|
|
|
key: test_mcc
|
|
value: [0.46056619 0.85714286 1. 0.16903085 0.70710678 0.70710678
|
|
0.19245009 0.84515425 0.84515425 0.70710678]
|
|
|
|
mean value: 0.649081883730695
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.92307692 1. 0.58333333 0.83333333 0.83333333
|
|
0.58333333 0.91666667 0.91666667 0.83333333]
|
|
|
|
mean value: 0.8115384615384615
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.92307692 1. 0.61538462 0.8 0.85714286
|
|
0.66666667 0.92307692 0.92307692 0.85714286]
|
|
|
|
mean value: 0.8343345543345543
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.85714286 1. 0.57142857 1. 0.75
|
|
0.55555556 0.85714286 0.85714286 0.75 ]
|
|
|
|
mean value: 0.7834776334776334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.66666667 0.66666667 1.
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.92857143 1. 0.58333333 0.83333333 0.83333333
|
|
0.58333333 0.91666667 0.91666667 0.83333333]
|
|
|
|
mean value: 0.8095238095238095
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.85714286 1. 0.44444444 0.66666667 0.75
|
|
0.5 0.85714286 0.85714286 0.75 ]
|
|
|
|
mean value: 0.7318903318903318
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.0785718 1.0573616 1.08498979 1.07473946 1.14857554 1.12651348
|
|
1.10159993 1.08371973 1.10169363 1.09919453]
|
|
|
|
mean value: 1.0956959486007691
|
|
|
|
key: score_time
|
|
value: [0.09313488 0.08697271 0.08894348 0.0912149 0.09502411 0.09491062
|
|
0.08662415 0.09425902 0.09359431 0.09493256]
|
|
|
|
mean value: 0.09196107387542725
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.84515425 0.84515425 0.84515425 0.84515425
|
|
0.70710678 0.84515425 0.70710678 0.84515425]
|
|
|
|
mean value: 0.8485139090744195
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.91666667 0.91666667 0.91666667 0.91666667
|
|
0.83333333 0.91666667 0.83333333 0.91666667]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.90909091 0.92307692 0.90909091 0.92307692
|
|
0.85714286 0.92307692 0.85714286 0.92307692]
|
|
|
|
mean value: 0.9224775224775225
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.85714286 1. 0.85714286
|
|
0.75 0.85714286 0.75 0.85714286]
|
|
|
|
mean value: 0.8928571428571428
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.83333333 1. 0.83333333 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.91666667 0.91666667 0.91666667 0.91666667
|
|
0.83333333 0.91666667 0.83333333 0.91666667]
|
|
|
|
mean value: 0.9166666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
test_jcc
|
|
value: [1. 1. 0.83333333 0.85714286 0.83333333 0.85714286
|
|
0.75 0.85714286 0.75 0.85714286]
|
|
|
|
mean value: 0.8595238095238095
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.89127827 0.85924602 0.83854628 0.89531541 0.85359836 0.89145994
|
|
0.85005498 0.88606501 0.96694565 0.92019033]
|
|
|
|
mean value: 0.8852700233459473
|
|
|
|
key: score_time
|
|
value: [0.20348477 0.22191596 0.16322684 0.18678927 0.23362851 0.23721766
|
|
0.1858592 0.18642545 0.1423831 0.18166327]
|
|
|
|
mean value: 0.1942594051361084
|
|
|
|
key: test_mcc
|
|
value: [1. 0.85714286 0.84515425 0.70710678 0.84515425 1.
|
|
0.57735027 0.84515425 0.70710678 0.84515425]
|
|
|
|
mean value: 0.8229323707619645
|
|
|
|
key: train_mcc
|
|
value: [1. 0.98181818 1. 1. 1. 0.96363636
|
|
1. 0.98198051 0.98198051 1. ]
|
|
|
|
mean value: 0.9909415557578477
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.92307692 0.91666667 0.83333333 0.91666667 1.
|
|
0.75 0.91666667 0.83333333 0.91666667]
|
|
|
|
mean value: 0.9006410256410257
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99082569 1. 1. 1. 0.98181818
|
|
1. 0.99090909 0.99090909 1. ]
|
|
|
|
mean value: 0.9954462051709758
|
|
|
|
key: test_fscore
|
|
value: [1. 0.92307692 0.90909091 0.85714286 0.90909091 1.
|
|
0.8 0.92307692 0.85714286 0.92307692]
|
|
|
|
mean value: 0.9101698301698301
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99082569 1. 1. 1. 0.98181818
|
|
1. 0.99082569 0.99099099 1. ]
|
|
|
|
mean value: 0.9954460548955961
|
|
|
|
key: test_precision
|
|
value: [1. 0.85714286 1. 0.75 1. 1.
|
|
0.66666667 0.85714286 0.75 0.85714286]
|
|
|
|
mean value: 0.8738095238095238
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.98181818
|
|
1. 1. 0.98214286 1. ]
|
|
|
|
mean value: 0.9963961038961039
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.83333333 1. 0.83333333 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 0.98181818 1. 1. 1. 0.98181818
|
|
1. 0.98181818 1. 1. ]
|
|
|
|
mean value: 0.9945454545454545
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.92857143 0.91666667 0.83333333 0.91666667 1.
|
|
0.75 0.91666667 0.83333333 0.91666667]
|
|
|
|
mean value: 0.9011904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99090909 1. 1. 1. 0.98181818
|
|
1. 0.99090909 0.99090909 1. ]
|
|
|
|
mean value: 0.9954545454545455
|
|
|
|
key: test_jcc
|
|
value: [1. 0.85714286 0.83333333 0.75 0.83333333 1.
|
|
0.66666667 0.85714286 0.75 0.85714286]
|
|
|
|
mean value: 0.8404761904761905
|
|
|
|
key: train_jcc
|
|
value: [1. 0.98181818 1. 1. 1. 0.96428571
|
|
1. 0.98181818 0.98214286 1. ]
|
|
|
|
mean value: 0.9910064935064935
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01271582 0.00877857 0.00878167 0.00886893 0.00946069 0.00971866
|
|
0.00891328 0.01002097 0.00903702 0.00889587]
|
|
|
|
mean value: 0.009519147872924804
|
|
|
|
key: score_time
|
|
value: [0.01380134 0.00883746 0.00854397 0.00910234 0.00898838 0.00944161
|
|
0.00923443 0.00930643 0.00865269 0.00872183]
|
|
|
|
mean value: 0.009463047981262207
|
|
|
|
key: test_mcc
|
|
value: [0.05143445 0.53674504 0.16903085 0.50709255 0.35355339 0.33333333
|
|
0.35355339 0.35355339 0.35355339 0.57735027]
|
|
|
|
mean value: 0.35892000587879247
|
|
|
|
key: train_mcc
|
|
value: [0.55982502 0.48669588 0.58268563 0.64320415 0.54772256 0.61910348
|
|
0.71097366 0.6401844 0.65726707 0.58268563]
|
|
|
|
mean value: 0.6030347502793679
|
|
|
|
key: test_accuracy
|
|
value: [0.53846154 0.76923077 0.58333333 0.75 0.66666667 0.66666667
|
|
0.66666667 0.66666667 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6724358974358974
|
|
|
|
key: train_accuracy
|
|
value: [0.77981651 0.74311927 0.79090909 0.81818182 0.77272727 0.80909091
|
|
0.85454545 0.81818182 0.82727273 0.79090909]
|
|
|
|
mean value: 0.8004753961634695
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.72727273 0.54545455 0.76923077 0.6 0.66666667
|
|
0.71428571 0.6 0.6 0.8 ]
|
|
|
|
mean value: 0.6647910422910422
|
|
|
|
key: train_fscore
|
|
value: [0.77358491 0.74074074 0.78504673 0.83050847 0.76190476 0.81415929
|
|
0.85964912 0.82758621 0.83478261 0.79646018]
|
|
|
|
mean value: 0.8024423019279884
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.8 0.6 0.71428571 0.75 0.66666667
|
|
0.625 0.75 0.75 0.66666667]
|
|
|
|
mean value: 0.6878174603174603
|
|
|
|
key: train_precision
|
|
value: [0.78846154 0.75471698 0.80769231 0.77777778 0.8 0.79310345
|
|
0.83050847 0.78688525 0.8 0.77586207]
|
|
|
|
mean value: 0.791500784278299
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.66666667 0.5 0.83333333 0.5 0.66666667
|
|
0.83333333 0.5 0.5 1. ]
|
|
|
|
mean value: 0.6714285714285715
|
|
|
|
key: train_recall
|
|
value: [0.75925926 0.72727273 0.76363636 0.89090909 0.72727273 0.83636364
|
|
0.89090909 0.87272727 0.87272727 0.81818182]
|
|
|
|
mean value: 0.8159259259259259
|
|
|
|
key: test_roc_auc
|
|
value: [0.52380952 0.76190476 0.58333333 0.75 0.66666667 0.66666667
|
|
0.66666667 0.66666667 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6702380952380953
|
|
|
|
key: train_roc_auc
|
|
value: [0.77962963 0.74326599 0.79090909 0.81818182 0.77272727 0.80909091
|
|
0.85454545 0.81818182 0.82727273 0.79090909]
|
|
|
|
mean value: 0.8004713804713804
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.57142857 0.375 0.625 0.42857143 0.5
|
|
0.55555556 0.42857143 0.42857143 0.66666667]
|
|
|
|
mean value: 0.5033910533910534
|
|
|
|
key: train_jcc
|
|
value: [0.63076923 0.58823529 0.64615385 0.71014493 0.61538462 0.68656716
|
|
0.75384615 0.70588235 0.71641791 0.66176471]
|
|
|
|
mean value: 0.671516620125812
|
|
|
|
MCC on Blind test: -0.02
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.25603175 0.0416081 0.09237313 0.03630376 0.03942323 0.03782082
|
|
0.03896832 0.04102874 0.14622521 0.10856009]
|
|
|
|
mean value: 0.08383431434631347
|
|
|
|
key: score_time
|
|
value: [0.0129261 0.01272106 0.01062417 0.01029301 0.01023841 0.01021624
|
|
0.01023221 0.01201367 0.01219821 0.01212096]
|
|
|
|
mean value: 0.011358404159545898
|
|
|
|
key: test_mcc
|
|
value: [1. 0.85714286 0.70710678 1. 1. 0.84515425
|
|
0.84515425 1. 0.84515425 0.84515425]
|
|
|
|
mean value: 0.8944866657243471
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.92307692 0.83333333 1. 1. 0.91666667
|
|
0.91666667 1. 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9423076923076923
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.92307692 0.8 1. 1. 0.92307692
|
|
0.92307692 1. 0.92307692 0.92307692]
|
|
|
|
mean value: 0.9415384615384615
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.85714286 1. 1. 1. 0.85714286
|
|
0.85714286 1. 0.85714286 0.85714286]
|
|
|
|
mean value: 0.9285714285714286
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.92857143 0.83333333 1. 1. 0.91666667
|
|
0.91666667 1. 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9428571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.85714286 0.66666667 1. 1. 0.85714286
|
|
0.85714286 1. 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8952380952380952
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 1.0
|
|
|
|
Accuracy on Blind test: 1.0
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02425313 0.04587579 0.04571676 0.04559064 0.0204463 0.04565573
|
|
0.0427928 0.04377961 0.05601478 0.04367304]
|
|
|
|
mean value: 0.04137985706329346
|
|
|
|
key: score_time
|
|
value: [0.02400351 0.01777744 0.02207375 0.02313566 0.01182032 0.02527142
|
|
0.02217817 0.02055264 0.02132297 0.01375055]
|
|
|
|
mean value: 0.02018864154815674
|
|
|
|
key: test_mcc
|
|
value: [1. 0.38095238 0.84515425 0.84515425 0.50709255 0.70710678
|
|
0.50709255 0.70710678 0.50709255 0.57735027]
|
|
|
|
mean value: 0.6584102380483465
|
|
|
|
key: train_mcc
|
|
value: [0.98181818 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9981818181818182
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.69230769 0.91666667 0.91666667 0.75 0.83333333
|
|
0.75 0.83333333 0.75 0.75 ]
|
|
|
|
mean value: 0.8192307692307692
|
|
|
|
key: train_accuracy
|
|
value: [0.99082569 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9990825688073395
|
|
|
|
key: test_fscore
|
|
value: [1. 0.66666667 0.90909091 0.92307692 0.76923077 0.85714286
|
|
0.76923077 0.85714286 0.76923077 0.8 ]
|
|
|
|
mean value: 0.832081252081252
|
|
|
|
key: train_fscore
|
|
value: [0.99082569 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9990825688073395
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 1. 0.85714286 0.71428571 0.75
|
|
0.71428571 0.75 0.71428571 0.66666667]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_precision
|
|
value: [0.98181818 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9981818181818182
|
|
|
|
key: test_recall
|
|
value: [1. 0.66666667 0.83333333 1. 0.83333333 1.
|
|
0.83333333 1. 0.83333333 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.69047619 0.91666667 0.91666667 0.75 0.83333333
|
|
0.75 0.83333333 0.75 0.75 ]
|
|
|
|
mean value: 0.8190476190476191
|
|
|
|
key: train_roc_auc
|
|
value: [0.99090909 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999090909090909
|
|
|
|
key: test_jcc
|
|
value: [1. 0.5 0.83333333 0.85714286 0.625 0.75
|
|
0.625 0.75 0.625 0.66666667]
|
|
|
|
mean value: 0.7232142857142857
|
|
|
|
key: train_jcc
|
|
value: [0.98181818 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9981818181818182
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02053356 0.00876498 0.00939441 0.00835562 0.00835085 0.00902057
|
|
0.00838923 0.00857806 0.00837302 0.00829887]
|
|
|
|
mean value: 0.009805917739868164
|
|
|
|
key: score_time
|
|
value: [0.00957847 0.00857019 0.00847888 0.00822067 0.00830531 0.00838208
|
|
0.00837874 0.00827432 0.00823951 0.00822854]
|
|
|
|
mean value: 0.00846567153930664
|
|
|
|
key: test_mcc
|
|
value: [-0.09759001 0.07142857 0. 0.50709255 0.50709255 0.50709255
|
|
0. 0. 0. 0.16903085]
|
|
|
|
mean value: 0.16641470735907513
|
|
|
|
key: train_mcc
|
|
value: [0.63299663 0.38363515 0.56513291 0.41818182 0.36369648 0.47343208
|
|
0.36514837 0.54626778 0.38283197 0.36661779]
|
|
|
|
mean value: 0.4497940981915372
|
|
|
|
key: test_accuracy
|
|
value: [0.46153846 0.53846154 0.5 0.75 0.75 0.75
|
|
0.5 0.5 0.5 0.58333333]
|
|
|
|
mean value: 0.5833333333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.81651376 0.68807339 0.78181818 0.70909091 0.68181818 0.73636364
|
|
0.68181818 0.77272727 0.69090909 0.68181818]
|
|
|
|
mean value: 0.7240950792326939
|
|
|
|
key: test_fscore
|
|
value: [0.53333333 0.5 0.4 0.72727273 0.76923077 0.72727273
|
|
0.57142857 0.4 0.5 0.61538462]
|
|
|
|
mean value: 0.5743922743922744
|
|
|
|
key: train_fscore
|
|
value: [0.81481481 0.66 0.77358491 0.70909091 0.67889908 0.72897196
|
|
0.66666667 0.76635514 0.67924528 0.66019417]
|
|
|
|
mean value: 0.7137822939381463
|
|
|
|
key: test_precision
|
|
value: [0.5 0.5 0.5 0.8 0.71428571 0.8
|
|
0.5 0.5 0.5 0.57142857]
|
|
|
|
mean value: 0.5885714285714285
|
|
|
|
key: train_precision
|
|
value: [0.81481481 0.73333333 0.80392157 0.70909091 0.68518519 0.75
|
|
0.7 0.78846154 0.70588235 0.70833333]
|
|
|
|
mean value: 0.7399023035787742
|
|
|
|
key: test_recall
|
|
value: [0.57142857 0.5 0.33333333 0.66666667 0.83333333 0.66666667
|
|
0.66666667 0.33333333 0.5 0.66666667]
|
|
|
|
mean value: 0.5738095238095238
|
|
|
|
key: train_recall
|
|
value: [0.81481481 0.6 0.74545455 0.70909091 0.67272727 0.70909091
|
|
0.63636364 0.74545455 0.65454545 0.61818182]
|
|
|
|
mean value: 0.6905723905723906
|
|
|
|
key: test_roc_auc
|
|
value: [0.45238095 0.53571429 0.5 0.75 0.75 0.75
|
|
0.5 0.5 0.5 0.58333333]
|
|
|
|
mean value: 0.5821428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [0.81649832 0.68888889 0.78181818 0.70909091 0.68181818 0.73636364
|
|
0.68181818 0.77272727 0.69090909 0.68181818]
|
|
|
|
mean value: 0.7241750841750842
|
|
|
|
key: test_jcc
|
|
value: [0.36363636 0.33333333 0.25 0.57142857 0.625 0.57142857
|
|
0.4 0.25 0.33333333 0.44444444]
|
|
|
|
mean value: 0.4142604617604618
|
|
|
|
key: train_jcc
|
|
value: [0.6875 0.49253731 0.63076923 0.54929577 0.51388889 0.57352941
|
|
0.5 0.62121212 0.51428571 0.49275362]
|
|
|
|
mean value: 0.557577207818979
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01074457 0.01317334 0.01480269 0.01546049 0.01378179 0.01408696
|
|
0.01493144 0.01416516 0.01332617 0.01422429]
|
|
|
|
mean value: 0.013869690895080566
|
|
|
|
key: score_time
|
|
value: [0.00849676 0.01131868 0.01156116 0.01131964 0.01126122 0.01135564
|
|
0.01128125 0.01134062 0.01136756 0.01137233]
|
|
|
|
mean value: 0.011067485809326172
|
|
|
|
key: test_mcc
|
|
value: [0.6172134 0.54761905 0.70710678 0.84515425 1. 0.70710678
|
|
0.70710678 0.70710678 0.30151134 0.70710678]
|
|
|
|
mean value: 0.6847031952706434
|
|
|
|
key: train_mcc
|
|
value: [0.6650199 0.85382288 0.94686415 0.96427411 0.94561086 0.94686415
|
|
1. 0.83205029 0.87988269 0.94561086]
|
|
|
|
mean value: 0.8979999897210901
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.76923077 0.83333333 0.91666667 1. 0.83333333
|
|
0.83333333 0.83333333 0.58333333 0.83333333]
|
|
|
|
mean value: 0.8205128205128205
|
|
|
|
key: train_accuracy
|
|
value: [0.80733945 0.9266055 0.97272727 0.98181818 0.97272727 0.97272727
|
|
1. 0.90909091 0.93636364 0.97272727]
|
|
|
|
mean value: 0.9452126772310259
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.76923077 0.8 0.92307692 1. 0.85714286
|
|
0.85714286 0.85714286 0.70588235 0.85714286]
|
|
|
|
mean value: 0.8354034201093025
|
|
|
|
key: train_fscore
|
|
value: [0.75862069 0.92592593 0.97345133 0.98214286 0.97297297 0.97345133
|
|
1. 0.91666667 0.94017094 0.97297297]
|
|
|
|
mean value: 0.9416375680374764
|
|
|
|
key: test_precision
|
|
value: [1. 0.71428571 1. 0.85714286 1. 0.75
|
|
0.75 0.75 0.54545455 0.75 ]
|
|
|
|
mean value: 0.8116883116883117
|
|
|
|
key: train_precision
|
|
value: [1. 0.94339623 0.94827586 0.96491228 0.96428571 0.94827586
|
|
1. 0.84615385 0.88709677 0.96428571]
|
|
|
|
mean value: 0.9466682280173603
|
|
|
|
key: test_recall
|
|
value: [0.57142857 0.83333333 0.66666667 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9071428571428571
|
|
|
|
key: train_recall
|
|
value: [0.61111111 0.90909091 1. 1. 0.98181818 1.
|
|
1. 1. 1. 0.98181818]
|
|
|
|
mean value: 0.9483838383838383
|
|
|
|
key: test_roc_auc
|
|
value: [0.78571429 0.77380952 0.83333333 0.91666667 1. 0.83333333
|
|
0.83333333 0.83333333 0.58333333 0.83333333]
|
|
|
|
mean value: 0.8226190476190476
|
|
|
|
key: train_roc_auc
|
|
value: [0.80555556 0.92676768 0.97272727 0.98181818 0.97272727 0.97272727
|
|
1. 0.90909091 0.93636364 0.97272727]
|
|
|
|
mean value: 0.945050505050505
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.625 0.66666667 0.85714286 1. 0.75
|
|
0.75 0.75 0.54545455 0.75 ]
|
|
|
|
mean value: 0.726569264069264
|
|
|
|
key: train_jcc
|
|
value: [0.61111111 0.86206897 0.94827586 0.96491228 0.94736842 0.94827586
|
|
1. 0.84615385 0.88709677 0.94736842]
|
|
|
|
mean value: 0.8962631543920696
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01313758 0.0132854 0.01294041 0.0129962 0.01362205 0.01266551
|
|
0.01328731 0.01308322 0.01234412 0.01280737]
|
|
|
|
mean value: 0.013016915321350098
|
|
|
|
key: score_time
|
|
value: [0.01067615 0.01131654 0.01146936 0.01136732 0.01141739 0.01198816
|
|
0.01223373 0.01159525 0.01143169 0.01143622]
|
|
|
|
mean value: 0.011493182182312012
|
|
|
|
key: test_mcc
|
|
value: [0.85391256 0.26726124 0.35355339 0.70710678 1. 0.50709255
|
|
0.50709255 0.57735027 0.84515425 0.57735027]
|
|
|
|
mean value: 0.61958738763042
|
|
|
|
key: train_mcc
|
|
value: [0.96396098 0.73832417 0.87287156 0.66885605 0.92788641 0.87635609
|
|
0.72648316 0.74114491 0.78651226 0.96427411]
|
|
|
|
mean value: 0.8266669713945304
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.53846154 0.66666667 0.83333333 1. 0.75
|
|
0.75 0.75 0.91666667 0.75 ]
|
|
|
|
mean value: 0.7878205128205128
|
|
|
|
key: train_accuracy
|
|
value: [0.98165138 0.85321101 0.93636364 0.80909091 0.96363636 0.93636364
|
|
0.84545455 0.85454545 0.89090909 0.98181818]
|
|
|
|
mean value: 0.9053044203502919
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.66666667 0.6 0.8 1. 0.76923077
|
|
0.72727273 0.8 0.90909091 0.8 ]
|
|
|
|
mean value: 0.8005594405594405
|
|
|
|
key: train_fscore
|
|
value: [0.98181818 0.87301587 0.93693694 0.76404494 0.96296296 0.93913043
|
|
0.8172043 0.87301587 0.88461538 0.98214286]
|
|
|
|
mean value: 0.9014887749186171
|
|
|
|
key: test_precision
|
|
value: [0.875 0.5 0.75 1. 1. 0.71428571
|
|
0.8 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.7972619047619047
|
|
|
|
key: train_precision
|
|
value: [0.96428571 0.77464789 0.92857143 1. 0.98113208 0.9
|
|
1. 0.77464789 0.93877551 0.96491228]
|
|
|
|
mean value: 0.9226972783882564
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.5 0.66666667 1. 0.83333333
|
|
0.66666667 1. 0.83333333 1. ]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.94545455 0.61818182 0.94545455 0.98181818
|
|
0.69090909 1. 0.83636364 1. ]
|
|
|
|
mean value: 0.9018181818181819
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.57142857 0.66666667 0.83333333 1. 0.75
|
|
0.75 0.75 0.91666667 0.75 ]
|
|
|
|
mean value: 0.7904761904761906
|
|
|
|
key: train_roc_auc
|
|
value: [0.98181818 0.85185185 0.93636364 0.80909091 0.96363636 0.93636364
|
|
0.84545455 0.85454545 0.89090909 0.98181818]
|
|
|
|
mean value: 0.9051851851851852
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.5 0.42857143 0.66666667 1. 0.625
|
|
0.57142857 0.66666667 0.83333333 0.66666667]
|
|
|
|
mean value: 0.6833333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.96428571 0.77464789 0.88135593 0.61818182 0.92857143 0.8852459
|
|
0.69090909 0.77464789 0.79310345 0.96491228]
|
|
|
|
mean value: 0.827586138941629
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09534383 0.085567 0.08703375 0.08614278 0.08468103 0.08674598
|
|
0.08505964 0.08544636 0.08634448 0.08527946]
|
|
|
|
mean value: 0.08676443099975586
|
|
|
|
key: score_time
|
|
value: [0.01453614 0.01494813 0.01455402 0.01467037 0.01481223 0.01462936
|
|
0.0144453 0.01477313 0.01445794 0.01468873]
|
|
|
|
mean value: 0.01465153694152832
|
|
|
|
key: test_mcc
|
|
value: [1. 0.85714286 0.70710678 0.84515425 1. 0.84515425
|
|
0.70710678 0.70710678 0.57735027 0.84515425]
|
|
|
|
mean value: 0.8091276234077676
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.92307692 0.83333333 0.91666667 1. 0.91666667
|
|
0.83333333 0.83333333 0.75 0.91666667]
|
|
|
|
mean value: 0.8923076923076924
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.92307692 0.8 0.92307692 1. 0.92307692
|
|
0.85714286 0.85714286 0.8 0.92307692]
|
|
|
|
mean value: 0.9006593406593406
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.85714286 1. 0.85714286 1. 0.85714286
|
|
0.75 0.75 0.66666667 0.85714286]
|
|
|
|
mean value: 0.8595238095238095
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.92857143 0.83333333 0.91666667 1. 0.91666667
|
|
0.83333333 0.83333333 0.75 0.91666667]
|
|
|
|
mean value: 0.8928571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.85714286 0.66666667 0.85714286 1. 0.85714286
|
|
0.75 0.75 0.66666667 0.85714286]
|
|
|
|
mean value: 0.8261904761904761
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02889991 0.0324192 0.03340173 0.0260253 0.03167248 0.03543282
|
|
0.03508973 0.02882719 0.03297305 0.02837276]
|
|
|
|
mean value: 0.031311416625976564
|
|
|
|
key: score_time
|
|
value: [0.01865363 0.02269077 0.02308249 0.02240968 0.03019142 0.02404714
|
|
0.01934457 0.01710725 0.01638937 0.01652551]
|
|
|
|
mean value: 0.021044182777404784
|
|
|
|
key: test_mcc
|
|
value: [1. 0.85714286 0.70710678 1. 1. 1.
|
|
1. 0.70710678 0.84515425 0.84515425]
|
|
|
|
mean value: 0.8961664928972985
|
|
|
|
key: train_mcc
|
|
value: [1. 0.98181818 1. 0.98198051 0.98198051 1.
|
|
0.98198051 1. 0.98198051 1. ]
|
|
|
|
mean value: 0.9909740206066044
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.92307692 0.83333333 1. 1. 1.
|
|
1. 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9423076923076923
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99082569 1. 0.99090909 0.99090909 1.
|
|
0.99090909 1. 0.99090909 1. ]
|
|
|
|
mean value: 0.9954462051709758
|
|
|
|
key: test_fscore
|
|
value: [1. 0.92307692 0.8 1. 1. 1.
|
|
1. 0.85714286 0.92307692 0.92307692]
|
|
|
|
mean value: 0.9426373626373626
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99082569 1. 0.99082569 0.99082569 1.
|
|
0.99082569 1. 0.99082569 1. ]
|
|
|
|
mean value: 0.9954128440366972
|
|
|
|
key: test_precision
|
|
value: [1. 0.85714286 1. 1. 1. 1.
|
|
1. 0.75 0.85714286 0.85714286]
|
|
|
|
mean value: 0.9321428571428572
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 0.98181818 1. 0.98181818 0.98181818 1.
|
|
0.98181818 1. 0.98181818 1. ]
|
|
|
|
mean value: 0.990909090909091
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.92857143 0.83333333 1. 1. 1.
|
|
1. 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9428571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99090909 1. 0.99090909 0.99090909 1.
|
|
0.99090909 1. 0.99090909 1. ]
|
|
|
|
mean value: 0.9954545454545455
|
|
|
|
key: test_jcc
|
|
value: [1. 0.85714286 0.66666667 1. 1. 1.
|
|
1. 0.75 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8988095238095238
|
|
|
|
key: train_jcc
|
|
value: [1. 0.98181818 1. 0.98181818 0.98181818 1.
|
|
0.98181818 1. 0.98181818 1. ]
|
|
|
|
mean value: 0.990909090909091
|
|
|
|
MCC on Blind test: 1.0
|
|
|
|
Accuracy on Blind test: 1.0
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03324318 0.04361248 0.04901958 0.05543017 0.04057789 0.06096983
|
|
0.06238627 0.0471065 0.03929687 0.04267597]
|
|
|
|
mean value: 0.04743187427520752
|
|
|
|
key: score_time
|
|
value: [0.01612687 0.02627373 0.03370476 0.015728 0.02386355 0.03294277
|
|
0.02333474 0.02058983 0.01648188 0.02381444]
|
|
|
|
mean value: 0.023286056518554688
|
|
|
|
key: test_mcc
|
|
value: [0.59160798 0.6172134 0.84515425 0.50709255 0.66666667 0.4472136
|
|
0.84515425 0.66666667 0.84515425 0.50709255]
|
|
|
|
mean value: 0.653901617685139
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.76923077 0.91666667 0.75 0.83333333 0.66666667
|
|
0.91666667 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8121794871794872
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.8 0.90909091 0.72727273 0.83333333 0.75
|
|
0.92307692 0.83333333 0.92307692 0.76923077]
|
|
|
|
mean value: 0.8291944330179624
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 0.66666667 1. 0.8 0.83333333 0.6
|
|
0.85714286 0.83333333 0.85714286 0.71428571]
|
|
|
|
mean value: 0.7861904761904762
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.83333333 0.66666667 0.83333333 1.
|
|
1. 0.83333333 1. 0.83333333]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.78571429 0.91666667 0.75 0.83333333 0.66666667
|
|
0.91666667 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8119047619047619
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.66666667 0.83333333 0.57142857 0.71428571 0.6
|
|
0.85714286 0.71428571 0.85714286 0.625 ]
|
|
|
|
mean value: 0.7139285714285715
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.23154759 0.17467642 0.16903472 0.200176 0.19823265 0.20229673
|
|
0.2020793 0.19040227 0.22231889 0.19493937]
|
|
|
|
mean value: 0.1985703945159912
|
|
|
|
key: score_time
|
|
value: [0.00958276 0.00997043 0.00927091 0.00918484 0.01013827 0.00904226
|
|
0.00913548 0.00904107 0.00891614 0.00885844]
|
|
|
|
mean value: 0.00931406021118164
|
|
|
|
key: test_mcc
|
|
value: [0.85391256 0.85714286 0.70710678 1. 1. 0.84515425
|
|
1. 0.70710678 0.84515425 0.84515425]
|
|
|
|
mean value: 0.8660731747531468
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.92307692 0.83333333 1. 1. 0.91666667
|
|
1. 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9262820512820513
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.92307692 0.8 1. 1. 0.92307692
|
|
1. 0.85714286 0.92307692 0.92307692]
|
|
|
|
mean value: 0.9282783882783883
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 0.85714286 1. 1. 1. 0.85714286
|
|
1. 0.75 0.85714286 0.85714286]
|
|
|
|
mean value: 0.9053571428571429
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.92857143 0.83333333 1. 1. 0.91666667
|
|
1. 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9261904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.85714286 0.66666667 1. 1. 0.85714286
|
|
1. 0.75 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8720238095238095
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01379657 0.01502037 0.01550913 0.01523709 0.01895094 0.01518345
|
|
0.01550031 0.01520324 0.01554441 0.0154326 ]
|
|
|
|
mean value: 0.01553781032562256
|
|
|
|
key: score_time
|
|
value: [0.01187181 0.01197243 0.01188207 0.01172543 0.01199055 0.01290751
|
|
0.01172757 0.01310897 0.01301908 0.01175666]
|
|
|
|
mean value: 0.012196207046508789
|
|
|
|
key: test_mcc
|
|
value: [1. 0.7200823 0.70710678 0.70710678 0.57735027 0.84515425
|
|
0.84515425 0.84515425 0.70710678 0.4472136 ]
|
|
|
|
mean value: 0.7401429272257871
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.84615385 0.83333333 0.83333333 0.75 0.91666667
|
|
0.91666667 0.91666667 0.83333333 0.66666667]
|
|
|
|
mean value: 0.8512820512820513
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.8 0.8 0.8 0.66666667 0.90909091
|
|
0.90909091 0.90909091 0.8 0.5 ]
|
|
|
|
mean value: 0.8093939393939394
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.66666667 0.66666667 0.66666667 0.5 0.83333333
|
|
0.83333333 0.83333333 0.66666667 0.33333333]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.83333333 0.83333333 0.83333333 0.75 0.91666667
|
|
0.91666667 0.91666667 0.83333333 0.66666667]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.66666667 0.66666667 0.66666667 0.5 0.83333333
|
|
0.83333333 0.83333333 0.66666667 0.33333333]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03332019 0.03971887 0.04168487 0.0413003 0.04168844 0.03435564
|
|
0.0302372 0.03286815 0.01320338 0.01278949]
|
|
|
|
mean value: 0.03211665153503418
|
|
|
|
key: score_time
|
|
value: [0.02243114 0.02310896 0.02241683 0.02215528 0.02265072 0.02237749
|
|
0.019912 0.01178932 0.01181412 0.01143408]
|
|
|
|
mean value: 0.019008994102478027
|
|
|
|
key: test_mcc
|
|
value: [1. 0.6172134 0.70710678 0.84515425 1. 0.70710678
|
|
0.57735027 0.70710678 0.70710678 0.70710678]
|
|
|
|
mean value: 0.7575251829699248
|
|
|
|
key: train_mcc
|
|
value: [0.98181818 0.96329966 0.96427411 0.98198051 0.96427411 0.96427411
|
|
1. 0.98198051 0.96427411 0.96427411]
|
|
|
|
mean value: 0.9730449412912406
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.76923077 0.83333333 0.91666667 1. 0.83333333
|
|
0.75 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8602564102564103
|
|
|
|
key: train_accuracy
|
|
value: [0.99082569 0.98165138 0.98181818 0.99090909 0.98181818 0.98181818
|
|
1. 0.99090909 0.98181818 0.98181818]
|
|
|
|
mean value: 0.9863386155129275
|
|
|
|
key: test_fscore
|
|
value: [1. 0.8 0.8 0.92307692 1. 0.85714286
|
|
0.8 0.85714286 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8751648351648351
|
|
|
|
key: train_fscore
|
|
value: [0.99082569 0.98181818 0.98214286 0.99099099 0.98214286 0.98214286
|
|
1. 0.99099099 0.98214286 0.98214286]
|
|
|
|
mean value: 0.9865340137587844
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 1. 0.85714286 1. 0.75
|
|
0.66666667 0.75 0.75 0.75 ]
|
|
|
|
mean value: 0.819047619047619
|
|
|
|
key: train_precision
|
|
value: [0.98181818 0.98181818 0.96491228 0.98214286 0.96491228 0.96491228
|
|
1. 0.98214286 0.96491228 0.96491228]
|
|
|
|
mean value: 0.9752483481430849
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 0.98181818 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9981818181818182
|
|
|
|
key: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_8020.py:148: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_8020.py:151: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
test_roc_auc
|
|
value: [1. 0.78571429 0.83333333 0.91666667 1. 0.83333333
|
|
0.75 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.861904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [0.99090909 0.98164983 0.98181818 0.99090909 0.98181818 0.98181818
|
|
1. 0.99090909 0.98181818 0.98181818]
|
|
|
|
mean value: 0.9863468013468013
|
|
|
|
key: test_jcc
|
|
value: [1. 0.66666667 0.66666667 0.85714286 1. 0.75
|
|
0.66666667 0.75 0.75 0.75 ]
|
|
|
|
mean value: 0.7857142857142857
|
|
|
|
key: train_jcc
|
|
value: [0.98181818 0.96428571 0.96491228 0.98214286 0.96491228 0.96491228
|
|
1. 0.98214286 0.96491228 0.96491228]
|
|
|
|
mean value: 0.9734951013898382
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.19366241 0.18980885 0.16285515 0.14929509 0.28676248 0.20587111
|
|
0.15684533 0.28551149 0.20380044 0.20302534]
|
|
|
|
mean value: 0.2037437677383423
|
|
|
|
key: score_time
|
|
value: [0.02307987 0.02286029 0.0219593 0.0159812 0.02114606 0.02399015
|
|
0.02007651 0.02140069 0.02042842 0.02103853]
|
|
|
|
mean value: 0.0211961030960083
|
|
|
|
key: test_mcc
|
|
value: [1. 0.6172134 0.70710678 0.84515425 1. 0.70710678
|
|
0.57735027 0.50709255 0.70710678 0.70710678]
|
|
|
|
mean value: 0.737523760134981
|
|
|
|
key: train_mcc
|
|
value: [0.98181818 0.96329966 0.96427411 0.98198051 0.96427411 1.
|
|
1. 0.89090909 0.96427411 0.96427411]
|
|
|
|
mean value: 0.9675103886625406
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.76923077 0.83333333 0.91666667 1. 0.83333333
|
|
0.75 0.75 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8519230769230769
|
|
|
|
key: train_accuracy
|
|
value: [0.99082569 0.98165138 0.98181818 0.99090909 0.98181818 1.
|
|
1. 0.94545455 0.98181818 0.98181818]
|
|
|
|
mean value: 0.9836113427856548
|
|
|
|
key: test_fscore
|
|
value: [1. 0.8 0.8 0.92307692 1. 0.85714286
|
|
0.8 0.76923077 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8663736263736264
|
|
|
|
key: train_fscore
|
|
value: [0.99082569 0.98181818 0.98214286 0.99099099 0.98214286 1.
|
|
1. 0.94545455 0.98214286 0.98214286]
|
|
|
|
mean value: 0.9837660834908541
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 1. 0.85714286 1. 0.75
|
|
0.66666667 0.71428571 0.75 0.75 ]
|
|
|
|
mean value: 0.8154761904761905
|
|
|
|
key: train_precision
|
|
value: [0.98181818 0.98181818 0.96491228 0.98214286 0.96491228 1.
|
|
1. 0.94545455 0.96491228 0.96491228]
|
|
|
|
mean value: 0.9750882889040784
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 1. 1. 1.
|
|
1. 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_recall
|
|
value: [1. 0.98181818 1. 1. 1. 1.
|
|
1. 0.94545455 1. 1. ]
|
|
|
|
mean value: 0.9927272727272727
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.78571429 0.83333333 0.91666667 1. 0.83333333
|
|
0.75 0.75 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8535714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.99090909 0.98164983 0.98181818 0.99090909 0.98181818 1.
|
|
1. 0.94545455 0.98181818 0.98181818]
|
|
|
|
mean value: 0.9836195286195286
|
|
|
|
key: test_jcc
|
|
value: [1. 0.66666667 0.66666667 0.85714286 1. 0.75
|
|
0.66666667 0.625 0.75 0.75 ]
|
|
|
|
mean value: 0.7732142857142857
|
|
|
|
key: train_jcc
|
|
value: [0.98181818 0.96428571 0.96491228 0.98214286 0.96491228 1.
|
|
1. 0.89655172 0.96491228 0.96491228]
|
|
|
|
mean value: 0.9684447600191701
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03703213 0.02163553 0.02083802 0.02442241 0.02101946 0.0407753
|
|
0.0298636 0.0334065 0.0232501 0.02786827]
|
|
|
|
mean value: 0.028011131286621093
|
|
|
|
key: score_time
|
|
value: [0.01280808 0.01153064 0.01170087 0.01168966 0.02796268 0.01346302
|
|
0.02058768 0.01163387 0.01168513 0.0118463 ]
|
|
|
|
mean value: 0.014490795135498048
|
|
|
|
key: test_mcc
|
|
value: [ 1. 0.54772256 -0.41666667 1. 0.47140452 -0.16666667
|
|
0.75 0.09128709 0. 0.70710678]
|
|
|
|
mean value: 0.39841876190669395
|
|
|
|
key: train_mcc
|
|
value: [0.93649139 1. 1. 0.96774194 0.9344086 0.90204573
|
|
1. 1. 0.96824584 0.96824584]
|
|
|
|
mean value: 0.9677179336168809
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.71428571 0.28571429 1. 0.71428571 0.42857143
|
|
0.85714286 0.57142857 0.5 0.83333333]
|
|
|
|
mean value: 0.6904761904761905
|
|
|
|
key: train_accuracy
|
|
value: [0.96721311 1. 1. 0.98360656 0.96721311 0.95081967
|
|
1. 1. 0.98387097 0.98387097]
|
|
|
|
mean value: 0.9836594394500264
|
|
|
|
key: test_fscore
|
|
value: [1. 0.75 0.28571429 1. 0.8 0.5
|
|
0.85714286 0.66666667 0.4 0.85714286]
|
|
|
|
mean value: 0.7116666666666667
|
|
|
|
key: train_fscore
|
|
value: [0.96666667 1. 1. 0.98360656 0.96666667 0.94915254
|
|
1. 1. 0.98360656 0.98360656]
|
|
|
|
mean value: 0.9833305547837362
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 0.25 1. 0.66666667 0.5
|
|
1. 0.6 0.5 0.75 ]
|
|
|
|
mean value: 0.6866666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0.96666667 0.96551724
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9932183908045977
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.33333333 1. 1. 0.5
|
|
0.75 0.75 0.33333333 1. ]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_recall
|
|
value: [0.93548387 1. 1. 0.96774194 0.96666667 0.93333333
|
|
1. 1. 0.96774194 0.96774194]
|
|
|
|
mean value: 0.9738709677419355
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.75 0.29166667 1. 0.66666667 0.41666667
|
|
0.875 0.54166667 0.5 0.83333333]
|
|
|
|
mean value: 0.6875
|
|
|
|
key: train_roc_auc
|
|
value: [0.96774194 1. 1. 0.98387097 0.9672043 0.95053763
|
|
1. 1. 0.98387097 0.98387097]
|
|
|
|
mean value: 0.9837096774193549
|
|
|
|
key: test_jcc
|
|
value: [1. 0.6 0.16666667 1. 0.66666667 0.33333333
|
|
0.75 0.5 0.25 0.75 ]
|
|
|
|
mean value: 0.6016666666666667
|
|
|
|
key: train_jcc
|
|
value: [0.93548387 1. 1. 0.96774194 0.93548387 0.90322581
|
|
1. 1. 0.96774194 0.96774194]
|
|
|
|
mean value: 0.967741935483871
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.3757937 0.35823274 0.30778146 0.37675571 0.34572625 0.34817052
|
|
0.32510233 0.33132529 0.44610596 0.44800591]
|
|
|
|
mean value: 0.36629998683929443
|
|
|
|
key: score_time
|
|
value: [0.01195908 0.01186824 0.01176453 0.01182771 0.01213813 0.01193666
|
|
0.01182008 0.01173067 0.01298165 0.01300216]
|
|
|
|
mean value: 0.012102890014648437
|
|
|
|
key: test_mcc
|
|
value: [ 1. 0.54772256 -0.16666667 0.73029674 0.47140452 0.41666667
|
|
0.75 -0.16666667 0.4472136 0.70710678]
|
|
|
|
mean value: 0.4737077531656258
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.71428571 0.42857143 0.85714286 0.71428571 0.71428571
|
|
0.85714286 0.42857143 0.66666667 0.83333333]
|
|
|
|
mean value: 0.7214285714285714
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.75 0.33333333 0.8 0.8 0.75
|
|
0.85714286 0.5 0.5 0.85714286]
|
|
|
|
mean value: 0.7147619047619047
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 0.33333333 1. 0.66666667 0.75
|
|
1. 0.5 1. 0.75 ]
|
|
|
|
mean value: 0.76
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.33333333 0.66666667 1. 0.75
|
|
0.75 0.5 0.33333333 1. ]
|
|
|
|
mean value: 0.7333333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.75 0.41666667 0.83333333 0.66666667 0.70833333
|
|
0.875 0.41666667 0.66666667 0.83333333]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.6 0.2 0.66666667 0.66666667 0.6
|
|
0.75 0.33333333 0.33333333 0.75 ]
|
|
|
|
mean value: 0.59
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0119493 0.01030326 0.00839663 0.00916123 0.00806046 0.00806236
|
|
0.00808382 0.00821805 0.00808907 0.00828099]
|
|
|
|
mean value: 0.008860516548156738
|
|
|
|
key: score_time
|
|
value: [0.01362109 0.00873494 0.00842834 0.00891542 0.00822663 0.00823951
|
|
0.00822091 0.00823236 0.00822186 0.00827575]
|
|
|
|
mean value: 0.008911681175231934
|
|
|
|
key: test_mcc
|
|
value: [-0.09128709 -0.09128709 -0.09128709 0.41666667 0.41666667 0.73029674
|
|
0.41666667 0.16666667 0. 0.70710678]
|
|
|
|
mean value: 0.2580208912440853
|
|
|
|
key: train_mcc
|
|
value: [0.54153466 0.64178842 0.67384323 0.68707295 0.61975541 0.67858574
|
|
0.75310667 0.54654832 0.63439154 0.61807005]
|
|
|
|
mean value: 0.6394696981275051
|
|
|
|
key: test_accuracy
|
|
value: [0.42857143 0.42857143 0.42857143 0.71428571 0.71428571 0.85714286
|
|
0.71428571 0.57142857 0.5 0.83333333]
|
|
|
|
mean value: 0.6190476190476191
|
|
|
|
key: train_accuracy
|
|
value: [0.7704918 0.81967213 0.83606557 0.81967213 0.80327869 0.83606557
|
|
0.86885246 0.7704918 0.80645161 0.80645161]
|
|
|
|
mean value: 0.8137493389740877
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.5 0.5 0.66666667 0.75 0.88888889
|
|
0.75 0.57142857 0.4 0.85714286]
|
|
|
|
mean value: 0.6384126984126984
|
|
|
|
key: train_fscore
|
|
value: [0.78125 0.83076923 0.83333333 0.78431373 0.81818182 0.84375
|
|
0.87878788 0.78125 0.82857143 0.79310345]
|
|
|
|
mean value: 0.8173310863409748
|
|
|
|
key: test_precision
|
|
value: [0.4 0.4 0.4 0.66666667 0.75 0.8
|
|
0.75 0.66666667 0.5 0.75 ]
|
|
|
|
mean value: 0.6083333333333334
|
|
|
|
key: train_precision
|
|
value: [0.75757576 0.79411765 0.86206897 1. 0.75 0.79411765
|
|
0.80555556 0.73529412 0.74358974 0.85185185]
|
|
|
|
mean value: 0.8094171285854855
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.66666667 0.66666667 0.75 1.
|
|
0.75 0.5 0.33333333 1. ]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_recall
|
|
value: [0.80645161 0.87096774 0.80645161 0.64516129 0.9 0.9
|
|
0.96666667 0.83333333 0.93548387 0.74193548]
|
|
|
|
mean value: 0.8406451612903225
|
|
|
|
key: test_roc_auc
|
|
value: [0.45833333 0.45833333 0.45833333 0.70833333 0.70833333 0.83333333
|
|
0.70833333 0.58333333 0.5 0.83333333]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_roc_auc
|
|
value: [0.76989247 0.8188172 0.83655914 0.82258065 0.80483871 0.83709677
|
|
0.87043011 0.77150538 0.80645161 0.80645161]
|
|
|
|
mean value: 0.8144623655913978
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.33333333 0.33333333 0.5 0.6 0.8
|
|
0.6 0.4 0.25 0.75 ]
|
|
|
|
mean value: 0.49
|
|
|
|
key: train_jcc
|
|
value: [0.64102564 0.71052632 0.71428571 0.64516129 0.69230769 0.72972973
|
|
0.78378378 0.64102564 0.70731707 0.65714286]
|
|
|
|
mean value: 0.6922305738583845
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0083766 0.00832438 0.00838995 0.00863719 0.00825953 0.00824761
|
|
0.00836039 0.00815797 0.00819683 0.00821209]
|
|
|
|
mean value: 0.008316254615783692
|
|
|
|
key: score_time
|
|
value: [0.00845933 0.00820565 0.00836563 0.00838375 0.00821686 0.008461
|
|
0.00826406 0.00826144 0.00818753 0.00823259]
|
|
|
|
mean value: 0.00830378532409668
|
|
|
|
key: test_mcc
|
|
value: [ 0.16666667 0.54772256 -0.41666667 0.75 0.09128709 0.41666667
|
|
0.09128709 0.16666667 -0.33333333 0. ]
|
|
|
|
mean value: 0.14802967433402214
|
|
|
|
key: train_mcc
|
|
value: [0.70537634 0.67314268 0.73763441 0.74352218 0.77072165 0.67384323
|
|
0.77382584 0.77096774 0.71004695 0.71004695]
|
|
|
|
mean value: 0.726912797929234
|
|
|
|
key: test_accuracy
|
|
value: [0.57142857 0.71428571 0.28571429 0.85714286 0.57142857 0.71428571
|
|
0.57142857 0.57142857 0.33333333 0.5 ]
|
|
|
|
mean value: 0.569047619047619
|
|
|
|
key: train_accuracy
|
|
value: [0.85245902 0.83606557 0.86885246 0.86885246 0.8852459 0.83606557
|
|
0.8852459 0.8852459 0.85483871 0.85483871]
|
|
|
|
mean value: 0.8627710206240085
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.75 0.28571429 0.85714286 0.66666667 0.75
|
|
0.66666667 0.57142857 0.33333333 0.4 ]
|
|
|
|
mean value: 0.5852380952380952
|
|
|
|
key: train_fscore
|
|
value: [0.85245902 0.84375 0.87096774 0.87878788 0.88135593 0.83870968
|
|
0.87719298 0.8852459 0.85245902 0.85714286]
|
|
|
|
mean value: 0.8638071004371335
|
|
|
|
key: test_precision
|
|
value: [0.5 0.6 0.25 0.75 0.6 0.75
|
|
0.6 0.66666667 0.33333333 0.5 ]
|
|
|
|
mean value: 0.5549999999999999
|
|
|
|
key: train_precision
|
|
value: [0.86666667 0.81818182 0.87096774 0.82857143 0.89655172 0.8125
|
|
0.92592593 0.87096774 0.86666667 0.84375 ]
|
|
|
|
mean value: 0.8600749714021405
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.33333333 1. 0.75 0.75
|
|
0.75 0.5 0.33333333 0.33333333]
|
|
|
|
mean value: 0.6416666666666666
|
|
|
|
key: train_recall
|
|
value: [0.83870968 0.87096774 0.87096774 0.93548387 0.86666667 0.86666667
|
|
0.83333333 0.9 0.83870968 0.87096774]
|
|
|
|
mean value: 0.869247311827957
|
|
|
|
key: test_roc_auc
|
|
value: [0.58333333 0.75 0.29166667 0.875 0.54166667 0.70833333
|
|
0.54166667 0.58333333 0.33333333 0.5 ]
|
|
|
|
mean value: 0.5708333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.85268817 0.83548387 0.8688172 0.86774194 0.88494624 0.83655914
|
|
0.8844086 0.88548387 0.85483871 0.85483871]
|
|
|
|
mean value: 0.8625806451612904
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.6 0.16666667 0.75 0.5 0.6
|
|
0.5 0.4 0.2 0.25 ]
|
|
|
|
mean value: 0.43666666666666665
|
|
|
|
key: train_jcc
|
|
value: [0.74285714 0.72972973 0.77142857 0.78378378 0.78787879 0.72222222
|
|
0.78125 0.79411765 0.74285714 0.75 ]
|
|
|
|
mean value: 0.7606125027816204
|
|
|
|
MCC on Blind test: -0.09
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00827742 0.00793862 0.00854897 0.00954938 0.00971651 0.00933433
|
|
0.009269 0.00863719 0.00904822 0.00925064]
|
|
|
|
mean value: 0.00895702838897705
|
|
|
|
key: score_time
|
|
value: [0.01426268 0.01385069 0.01033854 0.01050949 0.01007295 0.00998449
|
|
0.01004267 0.00931239 0.00992942 0.01487494]
|
|
|
|
mean value: 0.011317825317382813
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 1. -0.41666667 0.73029674 0.41666667 0.54772256
|
|
-0.09128709 0.16666667 0.33333333 0.33333333]
|
|
|
|
mean value: 0.343673220792786
|
|
|
|
key: train_mcc
|
|
value: [0.60733867 0.47828912 0.61256703 0.60645161 0.64178842 0.50860215
|
|
0.60818119 0.63939757 0.67741935 0.54953196]
|
|
|
|
mean value: 0.5929567084149061
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 1. 0.28571429 0.85714286 0.71428571 0.71428571
|
|
0.42857143 0.57142857 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6619047619047619
|
|
|
|
key: train_accuracy
|
|
value: [0.80327869 0.73770492 0.80327869 0.80327869 0.81967213 0.75409836
|
|
0.80327869 0.81967213 0.83870968 0.77419355]
|
|
|
|
mean value: 0.7957165520888418
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 1. 0.28571429 0.8 0.75 0.66666667
|
|
0.33333333 0.57142857 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6407142857142857
|
|
|
|
key: train_fscore
|
|
value: [0.8125 0.75757576 0.79310345 0.80645161 0.80701754 0.75409836
|
|
0.80645161 0.81355932 0.83870968 0.78125 ]
|
|
|
|
mean value: 0.7970717335626711
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 0.25 1. 0.75 1.
|
|
0.5 0.66666667 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_precision
|
|
value: [0.78787879 0.71428571 0.85185185 0.80645161 0.85185185 0.74193548
|
|
0.78125 0.82758621 0.83870968 0.75757576]
|
|
|
|
mean value: 0.7959376944534063
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.33333333 0.66666667 0.75 0.5
|
|
0.25 0.5 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_recall
|
|
value: [0.83870968 0.80645161 0.74193548 0.80645161 0.76666667 0.76666667
|
|
0.83333333 0.8 0.83870968 0.80645161]
|
|
|
|
mean value: 0.8005376344086022
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 1. 0.29166667 0.83333333 0.70833333 0.75
|
|
0.45833333 0.58333333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6666666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.80268817 0.73655914 0.80430108 0.80322581 0.8188172 0.75430108
|
|
0.80376344 0.81935484 0.83870968 0.77419355]
|
|
|
|
mean value: 0.7955913978494624
|
|
|
|
key: test_jcc
|
|
value: [0.5 1. 0.16666667 0.66666667 0.6 0.5
|
|
0.2 0.4 0.5 0.5 ]
|
|
|
|
mean value: 0.5033333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.68421053 0.6097561 0.65714286 0.67567568 0.67647059 0.60526316
|
|
0.67567568 0.68571429 0.72222222 0.64102564]
|
|
|
|
mean value: 0.6633156727463153
|
|
|
|
MCC on Blind test: -0.02
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0113759 0.00972724 0.00855541 0.00846791 0.00845933 0.00856256
|
|
0.00842547 0.00843883 0.00858736 0.00891113]
|
|
|
|
mean value: 0.008951115608215331
|
|
|
|
key: score_time
|
|
value: [0.01047635 0.00906873 0.00828266 0.00831795 0.00829625 0.00824404
|
|
0.00831628 0.00833225 0.00834322 0.00828624]
|
|
|
|
mean value: 0.008596396446228028
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.75 -0.41666667 0.73029674 0.47140452 0.41666667
|
|
0.41666667 0.16666667 0. 0.70710678]
|
|
|
|
mean value: 0.36588080453178007
|
|
|
|
key: train_mcc
|
|
value: [0.77072165 0.80475071 0.83638369 0.77382584 0.77096774 0.80516731
|
|
0.83655914 0.8688172 0.74193548 0.77784447]
|
|
|
|
mean value: 0.7986973241794544
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.85714286 0.28571429 0.85714286 0.71428571 0.71428571
|
|
0.71428571 0.57142857 0.5 0.83333333]
|
|
|
|
mean value: 0.6761904761904762
|
|
|
|
key: train_accuracy
|
|
value: [0.8852459 0.90163934 0.91803279 0.8852459 0.8852459 0.90163934
|
|
0.91803279 0.93442623 0.87096774 0.88709677]
|
|
|
|
mean value: 0.8987572712850344
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.85714286 0.28571429 0.8 0.8 0.75
|
|
0.75 0.57142857 0.4 0.85714286]
|
|
|
|
mean value: 0.6738095238095239
|
|
|
|
key: train_fscore
|
|
value: [0.88888889 0.90625 0.92063492 0.89230769 0.8852459 0.90322581
|
|
0.91803279 0.93333333 0.87096774 0.89230769]
|
|
|
|
mean value: 0.9011194764384214
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.25 1. 0.66666667 0.75
|
|
0.75 0.66666667 0.5 0.75 ]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_precision
|
|
value: [0.875 0.87878788 0.90625 0.85294118 0.87096774 0.875
|
|
0.90322581 0.93333333 0.87096774 0.85294118]
|
|
|
|
mean value: 0.8819414855384969
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.33333333 0.66666667 1. 0.75
|
|
0.75 0.5 0.33333333 1. ]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_recall
|
|
value: [0.90322581 0.93548387 0.93548387 0.93548387 0.9 0.93333333
|
|
0.93333333 0.93333333 0.87096774 0.93548387]
|
|
|
|
mean value: 0.9216129032258065
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.875 0.29166667 0.83333333 0.66666667 0.70833333
|
|
0.70833333 0.58333333 0.5 0.83333333]
|
|
|
|
mean value: 0.6708333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.88494624 0.90107527 0.91774194 0.8844086 0.88548387 0.90215054
|
|
0.91827957 0.9344086 0.87096774 0.88709677]
|
|
|
|
mean value: 0.8986559139784946
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.75 0.16666667 0.66666667 0.66666667 0.6
|
|
0.6 0.4 0.25 0.75 ]
|
|
|
|
mean value: 0.535
|
|
|
|
key: train_jcc
|
|
value: [0.8 0.82857143 0.85294118 0.80555556 0.79411765 0.82352941
|
|
0.84848485 0.875 0.77142857 0.80555556]
|
|
|
|
mean value: 0.8205184194890077
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.32072473 0.41932726 0.46512365 0.29137492 0.34876966 0.34039497
|
|
0.33532476 0.39166927 0.29834557 0.29533124]
|
|
|
|
mean value: 0.3506386041641235
|
|
|
|
key: score_time
|
|
value: [0.0120399 0.01225924 0.01206875 0.01196504 0.01211524 0.01202726
|
|
0.01198578 0.01229382 0.01203632 0.01200294]
|
|
|
|
mean value: 0.012079429626464844
|
|
|
|
key: test_mcc
|
|
value: [ 0.75 0.54772256 -0.09128709 0.41666667 0.47140452 0.09128709
|
|
0.75 0.09128709 0.33333333 0.70710678]
|
|
|
|
mean value: 0.4067520952400273
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.71428571 0.42857143 0.71428571 0.71428571 0.57142857
|
|
0.85714286 0.57142857 0.66666667 0.83333333]
|
|
|
|
mean value: 0.6928571428571428
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.75 0.5 0.66666667 0.8 0.66666667
|
|
0.85714286 0.66666667 0.66666667 0.85714286]
|
|
|
|
mean value: 0.7288095238095238
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.6 0.4 0.66666667 0.66666667 0.6
|
|
1. 0.6 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6699999999999999
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 0.66666667 1. 0.75
|
|
0.75 0.75 0.66666667 1. ]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.45833333 0.70833333 0.66666667 0.54166667
|
|
0.875 0.54166667 0.66666667 0.83333333]
|
|
|
|
mean value: 0.6916666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.6 0.33333333 0.5 0.66666667 0.5
|
|
0.75 0.5 0.5 0.75 ]
|
|
|
|
mean value: 0.585
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01281238 0.01291943 0.00945044 0.01028609 0.01017141 0.00962567
|
|
0.0091424 0.00996327 0.00918722 0.00933814]
|
|
|
|
mean value: 0.010289645195007325
|
|
|
|
key: score_time
|
|
value: [0.011307 0.00959492 0.00866413 0.00930047 0.0089469 0.00817585
|
|
0.00832176 0.00896382 0.00819564 0.00821996]
|
|
|
|
mean value: 0.00896904468536377
|
|
|
|
key: test_mcc
|
|
value: [0.75 0.75 1. 0.73029674 0.73029674 1.
|
|
0.73029674 0.41666667 1. 0.70710678]
|
|
|
|
mean value: 0.7814663677873879
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.85714286 1. 0.85714286 0.85714286 1.
|
|
0.85714286 0.71428571 1. 0.83333333]
|
|
|
|
mean value: 0.8833333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.85714286 1. 0.8 0.88888889 1.
|
|
0.88888889 0.75 1. 0.85714286]
|
|
|
|
mean value: 0.8899206349206349
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.75 1. 1. 0.8 1. 0.8 0.75 1. 0.75]
|
|
|
|
mean value: 0.86
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.66666667 1. 1.
|
|
1. 0.75 1. 1. ]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 1. 0.83333333 0.83333333 1.
|
|
0.83333333 0.70833333 1. 0.83333333]
|
|
|
|
mean value: 0.8791666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.75 1. 0.66666667 0.8 1.
|
|
0.8 0.6 1. 0.75 ]
|
|
|
|
mean value: 0.8116666666666666
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.83
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07908535 0.08022213 0.0785079 0.08233738 0.08285427 0.08073807
|
|
0.08253217 0.08468795 0.08145404 0.07984471]
|
|
|
|
mean value: 0.08122639656066895
|
|
|
|
key: score_time
|
|
value: [0.01787519 0.01658177 0.01679277 0.01665854 0.01725149 0.01833034
|
|
0.01661563 0.01819777 0.01673818 0.01664948]
|
|
|
|
mean value: 0.01716911792755127
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.75 0.16666667 0.75 0.47140452 1.
|
|
0.09128709 -0.09128709 0.4472136 0.33333333]
|
|
|
|
mean value: 0.43352847829576563
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.85714286 0.57142857 0.85714286 0.71428571 1.
|
|
0.57142857 0.42857143 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7047619047619047
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.85714286 0.57142857 0.85714286 0.8 1.
|
|
0.66666667 0.33333333 0.5 0.66666667]
|
|
|
|
mean value: 0.6919047619047619
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.5 0.75 0.66666667 1.
|
|
0.6 0.5 1. 0.66666667]
|
|
|
|
mean value: 0.71
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.66666667 1. 1. 1.
|
|
0.75 0.25 0.33333333 0.66666667]
|
|
|
|
mean value: 0.7333333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.875 0.58333333 0.875 0.66666667 1.
|
|
0.54166667 0.45833333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7041666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.75 0.4 0.75 0.66666667 1.
|
|
0.5 0.2 0.33333333 0.5 ]
|
|
|
|
mean value: 0.56
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00856638 0.00917745 0.00828481 0.00852895 0.00912547 0.00919986
|
|
0.00925469 0.00916147 0.00940824 0.00939536]
|
|
|
|
mean value: 0.00901026725769043
|
|
|
|
key: score_time
|
|
value: [0.00831723 0.00839233 0.00822401 0.008991 0.00903416 0.00899053
|
|
0.00914025 0.00899673 0.00907063 0.00910521]
|
|
|
|
mean value: 0.008826208114624024
|
|
|
|
key: test_mcc
|
|
value: [-0.09128709 0.41666667 0.41666667 0.73029674 0. -0.09128709
|
|
0.35355339 -0.16666667 -0.4472136 -0.4472136 ]
|
|
|
|
mean value: 0.06735154237651907
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.42857143 0.71428571 0.71428571 0.85714286 0.57142857 0.42857143
|
|
0.57142857 0.42857143 0.33333333 0.33333333]
|
|
|
|
mean value: 0.5380952380952381
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.66666667 0.66666667 0.8 0.72727273 0.33333333
|
|
0.4 0.5 0. 0.5 ]
|
|
|
|
mean value: 0.5093939393939394
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.4 0.66666667 0.66666667 1. 0.57142857 0.5
|
|
1. 0.5 0. 0.4 ]
|
|
|
|
mean value: 0.5704761904761905
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.66666667 0.66666667 1. 0.25
|
|
0.25 0.5 0. 0.66666667]
|
|
|
|
mean value: 0.5333333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.45833333 0.70833333 0.70833333 0.83333333 0.5 0.45833333
|
|
0.625 0.41666667 0.33333333 0.33333333]
|
|
|
|
mean value: 0.5375
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.5 0.5 0.66666667 0.57142857 0.2
|
|
0.25 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.3688095238095238
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.98635054 0.97833443 0.97282434 1.00487256 0.97696161 0.97841191
|
|
1.02283835 1.02789211 1.00390172 0.98681951]
|
|
|
|
mean value: 0.9939207077026367
|
|
|
|
key: score_time
|
|
value: [0.08699942 0.08575749 0.08577275 0.08603191 0.08597279 0.0922606
|
|
0.08832407 0.14286637 0.09378266 0.09315681]
|
|
|
|
mean value: 0.0940924882888794
|
|
|
|
key: test_mcc
|
|
value: [ 0.75 0.75 0.16666667 0.73029674 0.73029674 0.75
|
|
0.41666667 -0.09128709 0.70710678 0.70710678]
|
|
|
|
mean value: 0.5616853289469343
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.85714286 0.57142857 0.85714286 0.85714286 0.85714286
|
|
0.71428571 0.42857143 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.85714286 0.57142857 0.8 0.88888889 0.85714286
|
|
0.75 0.33333333 0.8 0.8 ]
|
|
|
|
mean value: 0.7515079365079366
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.75 0.5 1. 0.8 1. 0.75 0.5 1. 1. ]
|
|
|
|
mean value: 0.805
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 0.66666667 1. 0.75
|
|
0.75 0.25 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.58333333 0.83333333 0.83333333 0.875
|
|
0.70833333 0.45833333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7708333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.75 0.75 0.4 0.66666667 0.8 0.75
|
|
0.6 0.2 0.66666667 0.66666667]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.77307272 0.84251928 0.87779164 0.91600084 0.87222672 0.82066631
|
|
0.92097926 0.82905579 0.83898282 0.82245517]
|
|
|
|
mean value: 0.8513750553131103
|
|
|
|
key: score_time
|
|
value: [0.20626283 0.22345376 0.2089684 0.2186265 0.19044161 0.23537469
|
|
0.20170259 0.20364547 0.22588205 0.12932777]
|
|
|
|
mean value: 0.20436856746673585
|
|
|
|
key: test_mcc
|
|
value: [0.54772256 0.75 0.16666667 0.41666667 0.73029674 0.75
|
|
0.41666667 0.16666667 0.70710678 0.70710678]
|
|
|
|
mean value: 0.5358899529885149
|
|
|
|
key: train_mcc
|
|
value: [0.96774194 0.96770777 1. 0.96774194 0.96774194 1.
|
|
1. 0.96770777 1. 0.93548387]
|
|
|
|
mean value: 0.9774125222808338
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.85714286 0.57142857 0.71428571 0.85714286 0.85714286
|
|
0.71428571 0.57142857 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7523809523809524
|
|
|
|
key: train_accuracy
|
|
value: [0.98360656 0.98360656 1. 0.98360656 0.98360656 1.
|
|
1. 0.98360656 1. 0.96774194]
|
|
|
|
mean value: 0.9885774722369117
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.85714286 0.57142857 0.66666667 0.88888889 0.85714286
|
|
0.75 0.57142857 0.8 0.85714286]
|
|
|
|
mean value: 0.756984126984127
|
|
|
|
key: train_fscore
|
|
value: [0.98360656 0.98412698 1. 0.98360656 0.98360656 1.
|
|
1. 0.98305085 1. 0.96774194]
|
|
|
|
mean value: 0.988573943919963
|
|
|
|
key: test_precision
|
|
value: [0.6 0.75 0.5 0.66666667 0.8 1.
|
|
0.75 0.66666667 1. 0.75 ]
|
|
|
|
mean value: 0.7483333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 0.96875 1. 1. 0.96774194 1.
|
|
1. 1. 1. 0.96774194]
|
|
|
|
mean value: 0.9904233870967742
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 0.66666667 1. 0.75
|
|
0.75 0.5 0.66666667 1. ]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [0.96774194 1. 1. 0.96774194 1. 1.
|
|
1. 0.96666667 1. 0.96774194]
|
|
|
|
mean value: 0.986989247311828
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.875 0.58333333 0.70833333 0.83333333 0.875
|
|
0.70833333 0.58333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.98387097 0.98333333 1. 0.98387097 0.98387097 1.
|
|
1. 0.98333333 1. 0.96774194]
|
|
|
|
mean value: 0.9886021505376344
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.75 0.4 0.5 0.8 0.75
|
|
0.6 0.4 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6216666666666667
|
|
|
|
key: train_jcc
|
|
value: [0.96774194 0.96875 1. 0.96774194 0.96774194 1.
|
|
1. 0.96666667 1. 0.9375 ]
|
|
|
|
mean value: 0.977614247311828
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.021029 0.00869584 0.00927711 0.00914264 0.00853944 0.00947785
|
|
0.0094018 0.00912237 0.00871825 0.00887489]
|
|
|
|
mean value: 0.01022791862487793
|
|
|
|
key: score_time
|
|
value: [0.01159883 0.00876713 0.00924754 0.00836539 0.00869107 0.00915337
|
|
0.00886583 0.0088799 0.00834179 0.0085423 ]
|
|
|
|
mean value: 0.00904531478881836
|
|
|
|
key: test_mcc
|
|
value: [ 0.16666667 0.54772256 -0.41666667 0.75 0.09128709 0.41666667
|
|
0.09128709 0.16666667 -0.33333333 0. ]
|
|
|
|
mean value: 0.14802967433402214
|
|
|
|
key: train_mcc
|
|
value: [0.70537634 0.67314268 0.73763441 0.74352218 0.77072165 0.67384323
|
|
0.77382584 0.77096774 0.71004695 0.71004695]
|
|
|
|
mean value: 0.726912797929234
|
|
|
|
key: test_accuracy
|
|
value: [0.57142857 0.71428571 0.28571429 0.85714286 0.57142857 0.71428571
|
|
0.57142857 0.57142857 0.33333333 0.5 ]
|
|
|
|
mean value: 0.569047619047619
|
|
|
|
key: train_accuracy
|
|
value: [0.85245902 0.83606557 0.86885246 0.86885246 0.8852459 0.83606557
|
|
0.8852459 0.8852459 0.85483871 0.85483871]
|
|
|
|
mean value: 0.8627710206240085
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.75 0.28571429 0.85714286 0.66666667 0.75
|
|
0.66666667 0.57142857 0.33333333 0.4 ]
|
|
|
|
mean value: 0.5852380952380952
|
|
|
|
key: train_fscore
|
|
value: [0.85245902 0.84375 0.87096774 0.87878788 0.88135593 0.83870968
|
|
0.87719298 0.8852459 0.85245902 0.85714286]
|
|
|
|
mean value: 0.8638071004371335
|
|
|
|
key: test_precision
|
|
value: [0.5 0.6 0.25 0.75 0.6 0.75
|
|
0.6 0.66666667 0.33333333 0.5 ]
|
|
|
|
mean value: 0.5549999999999999
|
|
|
|
key: train_precision
|
|
value: [0.86666667 0.81818182 0.87096774 0.82857143 0.89655172 0.8125
|
|
0.92592593 0.87096774 0.86666667 0.84375 ]
|
|
|
|
mean value: 0.8600749714021405
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.33333333 1. 0.75 0.75
|
|
0.75 0.5 0.33333333 0.33333333]
|
|
|
|
mean value: 0.6416666666666666
|
|
|
|
key: train_recall
|
|
value: [0.83870968 0.87096774 0.87096774 0.93548387 0.86666667 0.86666667
|
|
0.83333333 0.9 0.83870968 0.87096774]
|
|
|
|
mean value: 0.869247311827957
|
|
|
|
key: test_roc_auc
|
|
value: [0.58333333 0.75 0.29166667 0.875 0.54166667 0.70833333
|
|
0.54166667 0.58333333 0.33333333 0.5 ]
|
|
|
|
mean value: 0.5708333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.85268817 0.83548387 0.8688172 0.86774194 0.88494624 0.83655914
|
|
0.8844086 0.88548387 0.85483871 0.85483871]
|
|
|
|
mean value: 0.8625806451612904
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.6 0.16666667 0.75 0.5 0.6
|
|
0.5 0.4 0.2 0.25 ]
|
|
|
|
mean value: 0.43666666666666665
|
|
|
|
key: train_jcc
|
|
value: [0.74285714 0.72972973 0.77142857 0.78378378 0.78787879 0.72222222
|
|
0.78125 0.79411765 0.74285714 0.75 ]
|
|
|
|
mean value: 0.7606125027816204
|
|
|
|
MCC on Blind test: -0.09
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.04466558 0.03600001 0.06054211 0.03494573 0.03621554 0.03633571
|
|
0.08454704 0.03263474 0.0322969 0.03395486]
|
|
|
|
mean value: 0.043213820457458495
|
|
|
|
key: score_time
|
|
value: [0.01055193 0.01139474 0.0106864 0.01038241 0.0106504 0.0108726
|
|
0.01109147 0.01123834 0.01015639 0.01117992]
|
|
|
|
mean value: 0.010820460319519044
|
|
|
|
key: test_mcc
|
|
value: [0.75 1. 1. 0.73029674 1. 1.
|
|
1. 0.41666667 1. 1. ]
|
|
|
|
mean value: 0.8896963410006888
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 1. 1. 0.85714286 1. 1.
|
|
1. 0.71428571 1. 1. ]
|
|
|
|
mean value: 0.9428571428571428
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 1. 1. 0.8 1. 1.
|
|
1. 0.75 1. 1. ]
|
|
|
|
mean value: 0.9407142857142857
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 1. 1. 1. 1. 1. 0.75 1. 1. ]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.66666667 1. 1.
|
|
1. 0.75 1. 1. ]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 1. 1. 0.83333333 1. 1.
|
|
1. 0.70833333 1. 1. ]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 1. 1. 0.66666667 1. 1.
|
|
1. 0.6 1. 1. ]
|
|
|
|
mean value: 0.9016666666666666
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02066708 0.04462981 0.03472829 0.03227282 0.06287384 0.05817699
|
|
0.04753828 0.02436757 0.03686118 0.03724122]
|
|
|
|
mean value: 0.03993570804595947
|
|
|
|
key: score_time
|
|
value: [0.02117872 0.0186491 0.01912355 0.02146792 0.02823567 0.02956176
|
|
0.01161933 0.01144695 0.02165031 0.02292633]
|
|
|
|
mean value: 0.020585966110229493
|
|
|
|
key: test_mcc
|
|
value: [0.54772256 0.41666667 0.73029674 0.47140452 0. 1.
|
|
0.41666667 0.41666667 0.70710678 0.70710678]
|
|
|
|
mean value: 0.5413637384009514
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.96770777 1. 1. 1. ]
|
|
|
|
mean value: 0.9967707772694492
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.71428571 0.85714286 0.71428571 0.57142857 1.
|
|
0.71428571 0.71428571 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7666666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98360656 1. 1. 1. ]
|
|
|
|
mean value: 0.9983606557377049
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.66666667 0.8 0.5 0.72727273 1.
|
|
0.75 0.75 0.85714286 0.85714286]
|
|
|
|
mean value: 0.7658225108225108
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98305085 1. 1. 1. ]
|
|
|
|
mean value: 0.9983050847457627
|
|
|
|
key: test_precision
|
|
value: [0.6 0.66666667 1. 1. 0.57142857 1.
|
|
0.75 0.75 0.75 0.75 ]
|
|
|
|
mean value: 0.7838095238095238
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.66666667 0.66666667 0.33333333 1. 1.
|
|
0.75 0.75 1. 1. ]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.96666667 1. 1. 1. ]
|
|
|
|
mean value: 0.9966666666666667
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.70833333 0.83333333 0.66666667 0.5 1.
|
|
0.70833333 0.70833333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7541666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98333333 1. 1. 1. ]
|
|
|
|
mean value: 0.9983333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5 0.66666667 0.33333333 0.57142857 1.
|
|
0.6 0.6 0.75 0.75 ]
|
|
|
|
mean value: 0.6371428571428571
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.96666667 1. 1. 1. ]
|
|
|
|
mean value: 0.9966666666666667
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01754665 0.00957179 0.00943947 0.00904679 0.00934386 0.00924897
|
|
0.00916815 0.00947618 0.0093255 0.0092876 ]
|
|
|
|
mean value: 0.01014549732208252
|
|
|
|
key: score_time
|
|
value: [0.00894094 0.00956678 0.0091722 0.00872302 0.00914335 0.00920105
|
|
0.00918245 0.00917149 0.00912499 0.00905776]
|
|
|
|
mean value: 0.009128403663635255
|
|
|
|
key: test_mcc
|
|
value: [ 0.16666667 0.75 -0.47140452 0.41666667 0.09128709 0.41666667
|
|
0.41666667 0.16666667 0. 0. ]
|
|
|
|
mean value: 0.19532159054598294
|
|
|
|
key: train_mcc
|
|
value: [0.41198086 0.47573065 0.60733867 0.44301075 0.57373553 0.57419355
|
|
0.50807349 0.34553303 0.45760432 0.42289003]
|
|
|
|
mean value: 0.48200908795961867
|
|
|
|
key: test_accuracy
|
|
value: [0.57142857 0.85714286 0.28571429 0.71428571 0.57142857 0.71428571
|
|
0.71428571 0.57142857 0.5 0.5 ]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_accuracy
|
|
value: [0.70491803 0.73770492 0.80327869 0.72131148 0.78688525 0.78688525
|
|
0.75409836 0.67213115 0.72580645 0.70967742]
|
|
|
|
mean value: 0.740269698572184
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.85714286 0.44444444 0.66666667 0.66666667 0.75
|
|
0.75 0.57142857 0.4 0.66666667]
|
|
|
|
mean value: 0.6344444444444445
|
|
|
|
key: train_fscore
|
|
value: [0.72727273 0.75 0.8125 0.72131148 0.77966102 0.78688525
|
|
0.74576271 0.67741935 0.74626866 0.72727273]
|
|
|
|
mean value: 0.7474353916225617
|
|
|
|
key: test_precision
|
|
value: [0.5 0.75 0.33333333 0.66666667 0.6 0.75
|
|
0.75 0.66666667 0.5 0.5 ]
|
|
|
|
mean value: 0.6016666666666667
|
|
|
|
key: train_precision
|
|
value: [0.68571429 0.72727273 0.78787879 0.73333333 0.79310345 0.77419355
|
|
0.75862069 0.65625 0.69444444 0.68571429]
|
|
|
|
mean value: 0.7296525550675995
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.66666667 0.66666667 0.75 0.75
|
|
0.75 0.5 0.33333333 1. ]
|
|
|
|
mean value: 0.7083333333333334
|
|
|
|
key: train_recall
|
|
value: [0.77419355 0.77419355 0.83870968 0.70967742 0.76666667 0.8
|
|
0.73333333 0.7 0.80645161 0.77419355]
|
|
|
|
mean value: 0.7677419354838709
|
|
|
|
key: test_roc_auc
|
|
value: [0.58333333 0.875 0.33333333 0.70833333 0.54166667 0.70833333
|
|
0.70833333 0.58333333 0.5 0.5 ]
|
|
|
|
mean value: 0.6041666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.70376344 0.73709677 0.80268817 0.72150538 0.78655914 0.78709677
|
|
0.75376344 0.67258065 0.72580645 0.70967742]
|
|
|
|
mean value: 0.7400537634408602
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.75 0.28571429 0.5 0.5 0.6
|
|
0.6 0.4 0.25 0.5 ]
|
|
|
|
mean value: 0.4785714285714286
|
|
|
|
key: train_jcc
|
|
value: [0.57142857 0.6 0.68421053 0.56410256 0.63888889 0.64864865
|
|
0.59459459 0.51219512 0.5952381 0.57142857]
|
|
|
|
mean value: 0.5980735582596943
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00914431 0.01255655 0.01267147 0.01248646 0.01289606 0.01275468
|
|
0.01270366 0.01270175 0.01294661 0.01282668]
|
|
|
|
mean value: 0.01236882209777832
|
|
|
|
key: score_time
|
|
value: [0.00824523 0.01128888 0.01128697 0.0112443 0.01135373 0.0112834
|
|
0.01130748 0.01132607 0.01127195 0.01127648]
|
|
|
|
mean value: 0.010988450050354004
|
|
|
|
key: test_mcc
|
|
value: [ 0.75 0.16666667 -0.16666667 0.73029674 0.47140452 0.16666667
|
|
0.75 0.09128709 0.33333333 0.70710678]
|
|
|
|
mean value: 0.40000951382353284
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 0.93649139 1. 0.96770777
|
|
1. 1. 1. 0.96824584]
|
|
|
|
mean value: 0.9872445002176499
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.57142857 0.42857143 0.85714286 0.71428571 0.57142857
|
|
0.85714286 0.57142857 0.66666667 0.83333333]
|
|
|
|
mean value: 0.6928571428571428
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 0.96721311 1. 0.98360656
|
|
1. 1. 1. 0.98387097]
|
|
|
|
mean value: 0.9934690639873083
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.57142857 0.33333333 0.8 0.8 0.57142857
|
|
0.85714286 0.66666667 0.66666667 0.85714286]
|
|
|
|
mean value: 0.6980952380952381
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 0.96666667 1. 0.98305085
|
|
1. 1. 1. 0.98412698]
|
|
|
|
mean value: 0.9933844498251277
|
|
|
|
key: test_precision
|
|
value: [0.75 0.5 0.33333333 1. 0.66666667 0.66666667
|
|
1. 0.6 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6933333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1.
|
|
0.96875]
|
|
|
|
mean value: 0.996875
|
|
|
|
key: test_recall
|
|
value: [1. 0.66666667 0.33333333 0.66666667 1. 0.5
|
|
0.75 0.75 0.66666667 1. ]
|
|
|
|
mean value: 0.7333333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.93548387 1. 0.96666667
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9902150537634409
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.58333333 0.41666667 0.83333333 0.66666667 0.58333333
|
|
0.875 0.54166667 0.66666667 0.83333333]
|
|
|
|
mean value: 0.6875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 0.96774194 1. 0.98333333
|
|
1. 1. 1. 0.98387097]
|
|
|
|
mean value: 0.993494623655914
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.4 0.2 0.66666667 0.66666667 0.4
|
|
0.75 0.5 0.5 0.75 ]
|
|
|
|
mean value: 0.5583333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 0.93548387 1. 0.96666667
|
|
1. 1. 1. 0.96875 ]
|
|
|
|
mean value: 0.9870900537634408
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01255178 0.01199484 0.01199031 0.01195526 0.01208282 0.01200271
|
|
0.01195097 0.01220036 0.01220107 0.01197052]
|
|
|
|
mean value: 0.012090063095092774
|
|
|
|
key: score_time
|
|
value: [0.01112318 0.01124549 0.01131654 0.01131129 0.01126218 0.01125789
|
|
0.01139069 0.01133347 0.01130652 0.01134586]
|
|
|
|
mean value: 0.011289310455322266
|
|
|
|
key: test_mcc
|
|
value: [ 1. 0.73029674 -0.16666667 0.73029674 0.47140452 0.16666667
|
|
0.75 -0.09128709 0. 0.4472136 ]
|
|
|
|
mean value: 0.40379245100539046
|
|
|
|
key: train_mcc
|
|
value: [1. 0.61147387 1. 0.96770777 0.96770777 1.
|
|
0.96774194 1. 1. 0.69006556]
|
|
|
|
mean value: 0.9204696909857347
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.85714286 0.42857143 0.85714286 0.71428571 0.57142857
|
|
0.85714286 0.42857143 0.5 0.66666667]
|
|
|
|
mean value: 0.6880952380952381
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.7704918 1. 0.98360656 0.98360656 1.
|
|
0.98360656 1. 1. 0.82258065]
|
|
|
|
mean value: 0.9543892120571127
|
|
|
|
key: test_fscore
|
|
value: [1. 0.8 0.33333333 0.8 0.8 0.57142857
|
|
0.85714286 0.33333333 0.4 0.75 ]
|
|
|
|
mean value: 0.6645238095238095
|
|
|
|
key: train_fscore
|
|
value: [1. 0.70833333 1. 0.98412698 0.98305085 1.
|
|
0.98360656 1. 1. 0.84931507]
|
|
|
|
mean value: 0.9508432790788144
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.33333333 1. 0.66666667 0.66666667
|
|
1. 0.5 0.5 0.6 ]
|
|
|
|
mean value: 0.7266666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.96875 1. 1.
|
|
0.96774194 1. 1. 0.73809524]
|
|
|
|
mean value: 0.9674587173579109
|
|
|
|
key: test_recall
|
|
value: [1. 0.66666667 0.33333333 0.66666667 1. 0.5
|
|
0.75 0.25 0.33333333 1. ]
|
|
|
|
mean value: 0.65
|
|
|
|
key: train_recall
|
|
value: [1. 0.5483871 1. 1. 0.96666667 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.951505376344086
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.83333333 0.41666667 0.83333333 0.66666667 0.58333333
|
|
0.875 0.45833333 0.5 0.66666667]
|
|
|
|
mean value: 0.6833333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.77419355 1. 0.98333333 0.98333333 1.
|
|
0.98387097 1. 1. 0.82258065]
|
|
|
|
mean value: 0.9547311827956989
|
|
|
|
key: test_jcc
|
|
value: [1. 0.66666667 0.2 0.66666667 0.66666667 0.4
|
|
0.75 0.2 0.25 0.6 ]
|
|
|
|
mean value: 0.54
|
|
|
|
key: train_jcc
|
|
value: [1. 0.5483871 1. 0.96875 0.96666667 1.
|
|
0.96774194 1. 1. 0.73809524]
|
|
|
|
mean value: 0.9189640937019969
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08898473 0.07871199 0.07991958 0.07561755 0.07638383 0.07624745
|
|
0.07929993 0.0784533 0.07643414 0.07759047]
|
|
|
|
mean value: 0.07876429557800294
|
|
|
|
key: score_time
|
|
value: [0.01559711 0.01427269 0.01584983 0.0143671 0.01565814 0.0156498
|
|
0.01551533 0.01560354 0.01510358 0.01565456]
|
|
|
|
mean value: 0.015327167510986329
|
|
|
|
key: test_mcc
|
|
value: [0.75 0.75 1. 1. 1. 0.75
|
|
0.73029674 0.41666667 1. 1. ]
|
|
|
|
mean value: 0.8396963410006888
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.85714286 1. 1. 1. 0.85714286
|
|
0.85714286 0.71428571 1. 1. ]
|
|
|
|
mean value: 0.9142857142857143
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.85714286 1. 1. 1. 0.85714286
|
|
0.88888889 0.75 1. 1. ]
|
|
|
|
mean value: 0.921031746031746
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.75 1. 1. 1. 1. 0.8 0.75 1. 1. ]
|
|
|
|
mean value: 0.905
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.75 1. 0.75 1. 1. ]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 1. 1. 1. 0.875
|
|
0.83333333 0.70833333 1. 1. ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.75 1. 1. 1. 0.75 0.8 0.6 1. 1. ]
|
|
|
|
mean value: 0.865
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02859974 0.03944659 0.04689026 0.03875041 0.05311489 0.04923558
|
|
0.03199697 0.03748631 0.03366971 0.03188276]
|
|
|
|
mean value: 0.039107322692871094
|
|
|
|
key: score_time
|
|
value: [0.02200389 0.03538942 0.03918862 0.03866792 0.03664374 0.03812981
|
|
0.02471709 0.03105736 0.02733564 0.02395558]
|
|
|
|
mean value: 0.031708908081054685
|
|
|
|
key: test_mcc
|
|
value: [0.75 1. 0.73029674 0.73029674 1. 1.
|
|
0.73029674 0.41666667 1. 1. ]
|
|
|
|
mean value: 0.8357556896687331
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.96774194 1. 0.93635873 1.
|
|
1. 1. 0.96824584 0.96824584]
|
|
|
|
mean value: 0.9840592341535416
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 1. 0.85714286 0.85714286 1. 1.
|
|
0.85714286 0.71428571 1. 1. ]
|
|
|
|
mean value: 0.9142857142857143
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.98360656 1. 0.96721311 1.
|
|
1. 1. 0.98387097 0.98387097]
|
|
|
|
mean value: 0.9918561607615018
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 1. 0.8 0.8 1. 1.
|
|
0.88888889 0.75 1. 1. ]
|
|
|
|
mean value: 0.9096031746031746
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.98360656 1. 0.96551724 1.
|
|
1. 1. 0.98360656 0.98360656]
|
|
|
|
mean value: 0.9916336913510457
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 1. 1. 1. 1. 0.8 0.75 1. 1. ]
|
|
|
|
mean value: 0.93
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.66666667 0.66666667 1. 1.
|
|
1. 0.75 1. 1. ]
|
|
|
|
mean value: 0.9083333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.96774194 1. 0.93333333 1.
|
|
1. 1. 0.96774194 0.96774194]
|
|
|
|
mean value: 0.9836559139784946
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 1. 0.83333333 0.83333333 1. 1.
|
|
0.83333333 0.70833333 1. 1. ]
|
|
|
|
mean value: 0.9083333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.98387097 1. 0.96666667 1.
|
|
1. 1. 0.98387097 0.98387097]
|
|
|
|
mean value: 0.9918279569892473
|
|
|
|
key: test_jcc
|
|
value: [0.75 1. 0.66666667 0.66666667 1. 1.
|
|
0.8 0.6 1. 1. ]
|
|
|
|
mean value: 0.8483333333333334
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.96774194 1. 0.93333333 1.
|
|
1. 1. 0.96774194 0.96774194]
|
|
|
|
mean value: 0.9836559139784946
|
|
|
|
MCC on Blind test: 1.0
|
|
|
|
Accuracy on Blind test: 1.0
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01210237 0.01162744 0.01159906 0.01165557 0.01118517 0.01178312
|
|
0.01101565 0.01026011 0.01093006 0.01039171]
|
|
|
|
mean value: 0.011255025863647461
|
|
|
|
key: score_time
|
|
value: [0.00943017 0.00967622 0.00962114 0.00960255 0.00961351 0.00949693
|
|
0.00846124 0.00861764 0.00879622 0.00867271]
|
|
|
|
mean value: 0.009198832511901855
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.75 -0.09128709 0.73029674 -0.16666667 0.41666667
|
|
0.16666667 0.16666667 0. 0.33333333]
|
|
|
|
mean value: 0.2722342983756027
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.85714286 0.42857143 0.85714286 0.42857143 0.71428571
|
|
0.57142857 0.57142857 0.5 0.66666667]
|
|
|
|
mean value: 0.6309523809523809
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.85714286 0.5 0.8 0.5 0.75
|
|
0.57142857 0.57142857 0.4 0.66666667]
|
|
|
|
mean value: 0.6283333333333333
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.4 1. 0.5 0.75
|
|
0.66666667 0.66666667 0.5 0.66666667]
|
|
|
|
mean value: 0.6566666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.66666667 0.66666667 0.5 0.75
|
|
0.5 0.5 0.33333333 0.66666667]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.875 0.45833333 0.83333333 0.41666667 0.70833333
|
|
0.58333333 0.58333333 0.5 0.66666667]
|
|
|
|
mean value: 0.6333333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.75 0.33333333 0.66666667 0.33333333 0.6
|
|
0.4 0.4 0.25 0.5 ]
|
|
|
|
mean value: 0.47333333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13077664 0.15219951 0.11640406 0.11471939 0.13631082 0.11819267
|
|
0.1370101 0.11607504 0.14268708 0.13811922]
|
|
|
|
mean value: 0.1302494525909424
|
|
|
|
key: score_time
|
|
value: [0.00923109 0.00910282 0.0091238 0.0090394 0.0092144 0.00904584
|
|
0.00909853 0.0091846 0.0103991 0.00912237]
|
|
|
|
mean value: 0.009256196022033692
|
|
|
|
key: test_mcc
|
|
value: [0.75 1. 1. 0.73029674 0.73029674 1.
|
|
1. 0.41666667 1. 0.70710678]
|
|
|
|
mean value: 0.8334366934533657
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 1. 1. 0.85714286 0.85714286 1.
|
|
1. 0.71428571 1. 0.83333333]
|
|
|
|
mean value: 0.9119047619047619
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 1. 1. 0.8 0.88888889 1.
|
|
1. 0.75 1. 0.85714286]
|
|
|
|
mean value: 0.9153174603174603
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 1. 1. 0.8 1. 1. 0.75 1. 0.75]
|
|
|
|
mean value: 0.905
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.66666667 1. 1.
|
|
1. 0.75 1. 1. ]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 1. 1. 0.83333333 0.83333333 1.
|
|
1. 0.70833333 1. 0.83333333]
|
|
|
|
mean value: 0.9083333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 1. 1. 0.66666667 0.8 1.
|
|
1. 0.6 1. 0.75 ]
|
|
|
|
mean value: 0.8566666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.00919342 0.01295781 0.01313233 0.01273346 0.01262474 0.01801252
|
|
0.01392221 0.0140717 0.01392198 0.01428556]
|
|
|
|
mean value: 0.013485574722290039
|
|
|
|
key: score_time
|
|
value: [0.00864458 0.01168108 0.01161385 0.01162672 0.01184559 0.01237297
|
|
0.01233697 0.01219058 0.01253319 0.01231003]
|
|
|
|
mean value: 0.011715555191040039
|
|
|
|
key: test_mcc
|
|
value: [-0.41666667 -0.16666667 0.73029674 -0.16666667 -0.41666667 -0.16666667
|
|
0.35355339 -0.16666667 0.70710678 0.4472136 ]
|
|
|
|
mean value: 0.07381705106200007
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.28571429 0.42857143 0.85714286 0.42857143 0.28571429 0.42857143
|
|
0.57142857 0.42857143 0.83333333 0.66666667]
|
|
|
|
mean value: 0.5214285714285714
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.28571429 0.33333333 0.8 0.33333333 0.28571429 0.5
|
|
0.4 0.5 0.85714286 0.5 ]
|
|
|
|
mean value: 0.4795238095238095
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.25 0.33333333 1. 0.33333333 0.33333333 0.5
|
|
1. 0.5 0.75 1. ]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.33333333 0.66666667 0.33333333 0.25 0.5
|
|
0.25 0.5 1. 0.33333333]
|
|
|
|
mean value: 0.45
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.29166667 0.41666667 0.83333333 0.41666667 0.29166667 0.41666667
|
|
0.625 0.41666667 0.83333333 0.66666667]
|
|
|
|
mean value: 0.5208333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.16666667 0.2 0.66666667 0.2 0.16666667 0.33333333
|
|
0.25 0.33333333 0.75 0.33333333]
|
|
|
|
mean value: 0.33999999999999997
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0204258 0.01396132 0.01424479 0.01484466 0.01354003 0.0140686
|
|
0.0132165 0.01330948 0.01319122 0.01318932]
|
|
|
|
mean value: 0.014399170875549316
|
|
|
|
key: score_time
|
|
value: [0.01298738 0.0121665 0.0122776 0.0125823 0.01210737 0.01208949
|
|
0.01241326 0.01231694 0.01210666 0.01194143]
|
|
|
|
mean value: 0.012298893928527833
|
|
|
|
key: test_mcc
|
|
value: [0.75 0.54772256 0.09128709 0.75 0.47140452 0.75
|
|
0.75 0.09128709 0.70710678 0.70710678]
|
|
|
|
mean value: 0.5615914826504348
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.71428571 0.57142857 0.85714286 0.71428571 0.85714286
|
|
0.85714286 0.57142857 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.75 0.4 0.85714286 0.8 0.85714286
|
|
0.85714286 0.66666667 0.8 0.85714286]
|
|
|
|
mean value: 0.7702380952380952
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.6 0.5 0.75 0.66666667 1.
|
|
1. 0.6 1. 0.75 ]
|
|
|
|
mean value: 0.7616666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.33333333 1. 1. 0.75
|
|
0.75 0.75 0.66666667 1. ]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.54166667 0.875 0.66666667 0.875
|
|
0.875 0.54166667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7666666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.6 0.25 0.75 0.66666667 0.75
|
|
0.75 0.5 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6433333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_8020.py:168: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_8020.py:171: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.08883023 0.08182573 0.082582 0.08167529 0.082551 0.08211184
|
|
0.0906992 0.08215833 0.08254099 0.08212376]
|
|
|
|
mean value: 0.08370983600616455
|
|
|
|
key: score_time
|
|
value: [0.01185369 0.01161861 0.01159215 0.01164436 0.01162171 0.01170683
|
|
0.01178646 0.01165295 0.01165652 0.01162028]
|
|
|
|
mean value: 0.011675357818603516
|
|
|
|
key: test_mcc
|
|
value: [ 0.75 0.54772256 0.09128709 0.75 0.47140452 0.54772256
|
|
0.16666667 -0.16666667 0.4472136 0.70710678]
|
|
|
|
mean value: 0.43124571054053973
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.71428571 0.57142857 0.85714286 0.71428571 0.71428571
|
|
0.57142857 0.42857143 0.66666667 0.83333333]
|
|
|
|
mean value: 0.6928571428571428
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.75 0.4 0.85714286 0.8 0.66666667
|
|
0.57142857 0.5 0.5 0.85714286]
|
|
|
|
mean value: 0.675952380952381
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.6 0.5 0.75 0.66666667 1.
|
|
0.66666667 0.5 1. 0.75 ]
|
|
|
|
mean value: 0.7183333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.33333333 1. 1. 0.5
|
|
0.5 0.5 0.33333333 1. ]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.54166667 0.875 0.66666667 0.75
|
|
0.58333333 0.41666667 0.66666667 0.83333333]
|
|
|
|
mean value: 0.6958333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.6 0.25 0.75 0.66666667 0.5
|
|
0.4 0.33333333 0.33333333 0.75 ]
|
|
|
|
mean value: 0.5333333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02875185 0.0255394 0.02517962 0.02709579 0.02697229 0.02936125
|
|
0.02627015 0.02329493 0.05253077 0.05114675]
|
|
|
|
mean value: 0.031614279747009276
|
|
|
|
key: score_time
|
|
value: [0.0116415 0.01166344 0.01156759 0.01164579 0.0116365 0.01267076
|
|
0.01161218 0.01175666 0.01177502 0.01182175]
|
|
|
|
mean value: 0.011779117584228515
|
|
|
|
key: test_mcc
|
|
value: [0.69047619 0.7200823 0.70710678 0.84515425 0.70710678 0.84515425
|
|
0.70710678 0.84515425 0.57735027 0.84515425]
|
|
|
|
mean value: 0.7489846121962621
|
|
|
|
key: train_mcc
|
|
value: [0.88989899 0.946411 0.8932746 0.9104463 0.96427411 0.94686415
|
|
0.9104463 0.92788641 0.92973479 0.89090909]
|
|
|
|
mean value: 0.9210145747080757
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.84615385 0.83333333 0.91666667 0.83333333 0.91666667
|
|
0.83333333 0.91666667 0.75 0.91666667]
|
|
|
|
mean value: 0.860897435897436
|
|
|
|
key: train_accuracy
|
|
value: [0.94495413 0.97247706 0.94545455 0.95454545 0.98181818 0.97272727
|
|
0.95454545 0.96363636 0.96363636 0.94545455]
|
|
|
|
mean value: 0.9599249374478732
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.875 0.85714286 0.92307692 0.85714286 0.90909091
|
|
0.85714286 0.92307692 0.8 0.90909091]
|
|
|
|
mean value: 0.8744097569097569
|
|
|
|
key: train_fscore
|
|
value: [0.94545455 0.97297297 0.94736842 0.95575221 0.98214286 0.97345133
|
|
0.95575221 0.96428571 0.96491228 0.94545455]
|
|
|
|
mean value: 0.9607547089277411
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.77777778 0.75 0.85714286 0.75 1.
|
|
0.75 0.85714286 0.66666667 1. ]
|
|
|
|
mean value: 0.8242063492063492
|
|
|
|
key: train_precision
|
|
value: [0.94545455 0.94736842 0.91525424 0.93103448 0.96491228 0.94827586
|
|
0.93103448 0.94736842 0.93220339 0.94545455]
|
|
|
|
mean value: 0.9408360668420959
|
|
|
|
key: test_recall
|
|
value: [0.83333333 1. 1. 1. 1. 0.83333333
|
|
1. 1. 1. 0.83333333]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.94545455 1. 0.98181818 0.98181818 1. 1.
|
|
0.98181818 0.98181818 1. 0.94545455]
|
|
|
|
mean value: 0.9818181818181818
|
|
|
|
key: test_roc_auc
|
|
value: [0.8452381 0.83333333 0.83333333 0.91666667 0.83333333 0.91666667
|
|
0.83333333 0.91666667 0.75 0.91666667]
|
|
|
|
mean value: 0.8595238095238096
|
|
|
|
key: train_roc_auc
|
|
value: [0.94494949 0.97272727 0.94545455 0.95454545 0.98181818 0.97272727
|
|
0.95454545 0.96363636 0.96363636 0.94545455]
|
|
|
|
mean value: 0.9599494949494949
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.77777778 0.75 0.85714286 0.75 0.83333333
|
|
0.75 0.85714286 0.66666667 0.83333333]
|
|
|
|
mean value: 0.778968253968254
|
|
|
|
key: train_jcc
|
|
value: [0.89655172 0.94736842 0.9 0.91525424 0.96491228 0.94827586
|
|
0.91525424 0.93103448 0.93220339 0.89655172]
|
|
|
|
mean value: 0.9247406359264614
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.69710684 0.60986543 0.65137029 0.79301047 0.63927317 0.61157417
|
|
0.7155664 0.61273122 0.64011264 0.63945961]
|
|
|
|
mean value: 0.661007022857666
|
|
|
|
key: score_time
|
|
value: [0.01328039 0.01312089 0.01305461 0.01292634 0.01346755 0.0131743
|
|
0.01344585 0.01326036 0.01330876 0.01209259]
|
|
|
|
mean value: 0.013113164901733398
|
|
|
|
key: test_mcc
|
|
value: [1. 0.7200823 0.70710678 0.35355339 0.70710678 0.84515425
|
|
0.84515425 0.84515425 0.70710678 0.84515425]
|
|
|
|
mean value: 0.7575573052890079
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.92788641 1. 1. 1.
|
|
1. 0.94561086 1. 0.92788641]
|
|
|
|
mean value: 0.9801383675343656
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.84615385 0.83333333 0.66666667 0.83333333 0.91666667
|
|
0.91666667 0.91666667 0.83333333 0.91666667]
|
|
|
|
mean value: 0.867948717948718
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.96363636 1. 1. 1.
|
|
1. 0.97272727 1. 0.96363636]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_fscore
|
|
value: [1. 0.875 0.85714286 0.71428571 0.85714286 0.90909091
|
|
0.92307692 0.92307692 0.85714286 0.90909091]
|
|
|
|
mean value: 0.882504995004995
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.96428571 1. 1. 1.
|
|
1. 0.97297297 1. 0.96428571]
|
|
|
|
mean value: 0.9901544401544401
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 0.75 0.625 0.75 1.
|
|
0.85714286 0.85714286 0.75 1. ]
|
|
|
|
mean value: 0.8367063492063492
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.94736842 1. 1. 1.
|
|
1. 0.96428571 1. 0.94736842]
|
|
|
|
mean value: 0.9859022556390977
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.83333333 1. 0.83333333
|
|
1. 1. 1. 0.83333333]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.98181818 1. 1. 1.
|
|
1. 0.98181818 1. 0.98181818]
|
|
|
|
mean value: 0.9945454545454545
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.83333333 0.83333333 0.66666667 0.83333333 0.91666667
|
|
0.91666667 0.91666667 0.83333333 0.91666667]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.96363636 1. 1. 1.
|
|
1. 0.97272727 1. 0.96363636]
|
|
|
|
mean value: 0.99
|
|
|
|
key: test_jcc
|
|
value: [1. 0.77777778 0.75 0.55555556 0.75 0.83333333
|
|
0.85714286 0.85714286 0.75 0.83333333]
|
|
|
|
mean value: 0.7964285714285715
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.93103448 1. 1. 1.
|
|
1. 0.94736842 1. 0.93103448]
|
|
|
|
mean value: 0.9809437386569873
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01254106 0.010252 0.00896001 0.00852346 0.00851083 0.00835586
|
|
0.00845838 0.00857139 0.00846386 0.00846291]
|
|
|
|
mean value: 0.009109973907470703
|
|
|
|
key: score_time
|
|
value: [0.01179528 0.00893307 0.0084362 0.00835443 0.00845909 0.00839114
|
|
0.00832081 0.00839591 0.00828457 0.00838757]
|
|
|
|
mean value: 0.008775806427001953
|
|
|
|
key: test_mcc
|
|
value: [-0.31180478 0.03289758 0.30151134 0. 0. 0.
|
|
0.50709255 0.19245009 0. 0. ]
|
|
|
|
mean value: 0.07221467896615756
|
|
|
|
key: train_mcc
|
|
value: [0.4515871 0.3641034 0.43948995 0.34874292 0.6425396 0.39620291
|
|
0.55290734 0.40032038 0.38393633 0.43328912]
|
|
|
|
mean value: 0.44131190462170933
|
|
|
|
key: test_accuracy
|
|
value: [0.38461538 0.53846154 0.58333333 0.5 0.5 0.5
|
|
0.75 0.58333333 0.5 0.5 ]
|
|
|
|
mean value: 0.533974358974359
|
|
|
|
key: train_accuracy
|
|
value: [0.69724771 0.66055046 0.67272727 0.66363636 0.80909091 0.66363636
|
|
0.77272727 0.68181818 0.67272727 0.69090909]
|
|
|
|
mean value: 0.6985070892410342
|
|
|
|
key: test_fscore
|
|
value: [0.55555556 0.66666667 0.70588235 0.5 0.66666667 0.57142857
|
|
0.76923077 0.66666667 0.625 0.57142857]
|
|
|
|
mean value: 0.6298525820584644
|
|
|
|
key: train_fscore
|
|
value: [0.75912409 0.72180451 0.75 0.71317829 0.832 0.73758865
|
|
0.78991597 0.73684211 0.73134328 0.75 ]
|
|
|
|
mean value: 0.7521796901157152
|
|
|
|
key: test_precision
|
|
value: [0.41666667 0.54545455 0.54545455 0.5 0.5 0.5
|
|
0.71428571 0.55555556 0.5 0.5 ]
|
|
|
|
mean value: 0.5277417027417027
|
|
|
|
key: train_precision
|
|
value: [0.63414634 0.60759494 0.60674157 0.62162162 0.74285714 0.60465116
|
|
0.734375 0.62820513 0.62025316 0.62962963]
|
|
|
|
mean value: 0.6430075700867165
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.85714286 1. 0.5 1. 0.66666667
|
|
0.83333333 0.83333333 0.83333333 0.66666667]
|
|
|
|
mean value: 0.8023809523809524
|
|
|
|
key: train_recall
|
|
value: [0.94545455 0.88888889 0.98181818 0.83636364 0.94545455 0.94545455
|
|
0.85454545 0.89090909 0.89090909 0.92727273]
|
|
|
|
mean value: 0.9107070707070707
|
|
|
|
key: test_roc_auc
|
|
value: [0.41666667 0.51190476 0.58333333 0.5 0.5 0.5
|
|
0.75 0.58333333 0.5 0.5 ]
|
|
|
|
mean value: 0.5345238095238095
|
|
|
|
key: train_roc_auc
|
|
value: [0.69494949 0.66262626 0.67272727 0.66363636 0.80909091 0.66363636
|
|
0.77272727 0.68181818 0.67272727 0.69090909]
|
|
|
|
mean value: 0.6984848484848485
|
|
|
|
key: test_jcc
|
|
value: [0.38461538 0.5 0.54545455 0.33333333 0.5 0.4
|
|
0.625 0.5 0.45454545 0.4 ]
|
|
|
|
mean value: 0.4642948717948718
|
|
|
|
key: train_jcc
|
|
value: [0.61176471 0.56470588 0.6 0.55421687 0.71232877 0.58426966
|
|
0.65277778 0.58333333 0.57647059 0.6 ]
|
|
|
|
mean value: 0.6039867585096215
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01013207 0.00957036 0.00958872 0.00961208 0.00978279 0.00891852
|
|
0.00905418 0.00969553 0.00970125 0.00977802]
|
|
|
|
mean value: 0.009583353996276855
|
|
|
|
key: score_time
|
|
value: [0.00956488 0.00915766 0.00923085 0.00923824 0.00924587 0.00869727
|
|
0.00906658 0.00926638 0.00929189 0.00931239]
|
|
|
|
mean value: 0.009207201004028321
|
|
|
|
key: test_mcc
|
|
value: [ 0.21957752 0.53674504 0.33333333 0.16903085 0.50709255 -0.19245009
|
|
0.35355339 0. 0.57735027 0.4472136 ]
|
|
|
|
mean value: 0.2951446459204242
|
|
|
|
key: train_mcc
|
|
value: [0.55959596 0.59817936 0.69378191 0.74545455 0.62075223 0.6401844
|
|
0.49090909 0.6015931 0.65465367 0.56400939]
|
|
|
|
mean value: 0.6169113652737336
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.76923077 0.66666667 0.58333333 0.75 0.41666667
|
|
0.66666667 0.5 0.75 0.66666667]
|
|
|
|
mean value: 0.6384615384615384
|
|
|
|
key: train_accuracy
|
|
value: [0.77981651 0.79816514 0.84545455 0.87272727 0.80909091 0.81818182
|
|
0.74545455 0.8 0.82727273 0.78181818]
|
|
|
|
mean value: 0.8077981651376147
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.8 0.66666667 0.54545455 0.76923077 0.22222222
|
|
0.6 0.4 0.8 0.75 ]
|
|
|
|
mean value: 0.6099028749028749
|
|
|
|
key: train_fscore
|
|
value: [0.78181818 0.80357143 0.85217391 0.87272727 0.8173913 0.82758621
|
|
0.74545455 0.79245283 0.82882883 0.78571429]
|
|
|
|
mean value: 0.8107718797591079
|
|
|
|
key: test_precision
|
|
value: [0.6 0.75 0.66666667 0.6 0.71428571 0.33333333
|
|
0.75 0.5 0.66666667 0.6 ]
|
|
|
|
mean value: 0.618095238095238
|
|
|
|
key: train_precision
|
|
value: [0.78181818 0.77586207 0.81666667 0.87272727 0.78333333 0.78688525
|
|
0.74545455 0.82352941 0.82142857 0.77192982]
|
|
|
|
mean value: 0.7979635122621838
|
|
|
|
key: test_recall
|
|
value: [0.5 0.85714286 0.66666667 0.5 0.83333333 0.16666667
|
|
0.5 0.33333333 1. 1. ]
|
|
|
|
mean value: 0.6357142857142857
|
|
|
|
key: train_recall
|
|
value: [0.78181818 0.83333333 0.89090909 0.87272727 0.85454545 0.87272727
|
|
0.74545455 0.76363636 0.83636364 0.8 ]
|
|
|
|
mean value: 0.8251515151515152
|
|
|
|
key: test_roc_auc
|
|
value: [0.60714286 0.76190476 0.66666667 0.58333333 0.75 0.41666667
|
|
0.66666667 0.5 0.75 0.66666667]
|
|
|
|
mean value: 0.636904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [0.77979798 0.79848485 0.84545455 0.87272727 0.80909091 0.81818182
|
|
0.74545455 0.8 0.82727273 0.78181818]
|
|
|
|
mean value: 0.8078282828282829
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.66666667 0.5 0.375 0.625 0.125
|
|
0.42857143 0.25 0.66666667 0.6 ]
|
|
|
|
mean value: 0.46119047619047615
|
|
|
|
key: train_jcc
|
|
value: [0.64179104 0.67164179 0.74242424 0.77419355 0.69117647 0.70588235
|
|
0.5942029 0.65625 0.70769231 0.64705882]
|
|
|
|
mean value: 0.6832313479934091
|
|
|
|
MCC on Blind test: -0.02
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00934935 0.00933266 0.00925279 0.00924444 0.00926852 0.0090816
|
|
0.0083015 0.00961733 0.00941873 0.00917292]
|
|
|
|
mean value: 0.00920398235321045
|
|
|
|
key: score_time
|
|
value: [0.01055717 0.01017785 0.01014447 0.01015496 0.01021862 0.00954366
|
|
0.00957441 0.01016331 0.01006627 0.01016855]
|
|
|
|
mean value: 0.01007692813873291
|
|
|
|
key: test_mcc
|
|
value: [0.38095238 0.38575837 0.4472136 0. 0.16903085 0.66666667
|
|
0.35355339 0.16903085 0.57735027 0.50709255]
|
|
|
|
mean value: 0.3656648932535651
|
|
|
|
key: train_mcc
|
|
value: [0.63650308 0.68179009 0.60644963 0.65991202 0.69378191 0.62325024
|
|
0.60644963 0.6965738 0.72111026 0.65552134]
|
|
|
|
mean value: 0.6581341987834335
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.69230769 0.66666667 0.5 0.58333333 0.83333333
|
|
0.66666667 0.58333333 0.75 0.75 ]
|
|
|
|
mean value: 0.6717948717948719
|
|
|
|
key: train_accuracy
|
|
value: [0.81651376 0.83486239 0.8 0.82727273 0.84545455 0.80909091
|
|
0.8 0.84545455 0.85454545 0.82727273]
|
|
|
|
mean value: 0.82604670558799
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.75 0.75 0.625 0.61538462 0.83333333
|
|
0.6 0.61538462 0.8 0.72727273]
|
|
|
|
mean value: 0.6983041958041958
|
|
|
|
key: train_fscore
|
|
value: [0.82758621 0.84745763 0.81355932 0.83760684 0.85217391 0.82051282
|
|
0.81355932 0.85470085 0.86666667 0.83185841]
|
|
|
|
mean value: 0.8365681977693296
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.6 0.5 0.57142857 0.83333333
|
|
0.75 0.57142857 0.66666667 0.8 ]
|
|
|
|
mean value: 0.6626190476190477
|
|
|
|
key: train_precision
|
|
value: [0.78688525 0.78125 0.76190476 0.79032258 0.81666667 0.77419355
|
|
0.76190476 0.80645161 0.8 0.81034483]
|
|
|
|
mean value: 0.788992400589952
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.85714286 1. 0.83333333 0.66666667 0.83333333
|
|
0.5 0.66666667 1. 0.66666667]
|
|
|
|
mean value: 0.769047619047619
|
|
|
|
key: train_recall
|
|
value: [0.87272727 0.92592593 0.87272727 0.89090909 0.89090909 0.87272727
|
|
0.87272727 0.90909091 0.94545455 0.85454545]
|
|
|
|
mean value: 0.8907744107744108
|
|
|
|
key: test_roc_auc
|
|
value: [0.69047619 0.67857143 0.66666667 0.5 0.58333333 0.83333333
|
|
0.66666667 0.58333333 0.75 0.75 ]
|
|
|
|
mean value: 0.6702380952380952
|
|
|
|
key: train_roc_auc
|
|
value: [0.81599327 0.83569024 0.8 0.82727273 0.84545455 0.80909091
|
|
0.8 0.84545455 0.85454545 0.82727273]
|
|
|
|
mean value: 0.8260774410774411
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.6 0.6 0.45454545 0.44444444 0.71428571
|
|
0.42857143 0.44444444 0.66666667 0.57142857]
|
|
|
|
mean value: 0.5424386724386724
|
|
|
|
key: train_jcc
|
|
value: [0.70588235 0.73529412 0.68571429 0.72058824 0.74242424 0.69565217
|
|
0.68571429 0.74626866 0.76470588 0.71212121]
|
|
|
|
mean value: 0.7194365444838782
|
|
|
|
MCC on Blind test: -0.02
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01035452 0.00974393 0.0103569 0.01026082 0.00954938 0.00953484
|
|
0.00942683 0.00921464 0.00926161 0.00937915]
|
|
|
|
mean value: 0.009708261489868164
|
|
|
|
key: score_time
|
|
value: [0.00965738 0.00919104 0.0093267 0.00899458 0.00878954 0.00891089
|
|
0.0089469 0.00857568 0.00862193 0.00856423]
|
|
|
|
mean value: 0.008957886695861816
|
|
|
|
key: test_mcc
|
|
value: [0.38575837 0.59160798 0.57735027 0.57735027 0.66666667 0.70710678
|
|
0.84515425 0. 0.57735027 0.50709255]
|
|
|
|
mean value: 0.543543741620291
|
|
|
|
key: train_mcc
|
|
value: [0.85372474 0.92724828 0.87635609 0.85454545 0.89149871 0.83650191
|
|
0.89149871 0.80119274 0.92973479 0.87287156]
|
|
|
|
mean value: 0.873517298924518
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.76923077 0.75 0.75 0.83333333 0.83333333
|
|
0.91666667 0.5 0.75 0.75 ]
|
|
|
|
mean value: 0.7544871794871795
|
|
|
|
key: train_accuracy
|
|
value: [0.9266055 0.96330275 0.93636364 0.92727273 0.94545455 0.91818182
|
|
0.94545455 0.9 0.96363636 0.93636364]
|
|
|
|
mean value: 0.9362635529608007
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.82352941 0.8 0.66666667 0.83333333 0.8
|
|
0.90909091 0.5 0.8 0.76923077]
|
|
|
|
mean value: 0.7501851090086384
|
|
|
|
key: train_fscore
|
|
value: [0.92857143 0.96363636 0.93913043 0.92727273 0.94642857 0.91891892
|
|
0.94642857 0.89719626 0.96491228 0.93693694]
|
|
|
|
mean value: 0.9369432495360124
|
|
|
|
key: test_precision
|
|
value: [0.75 0.7 0.66666667 1. 0.83333333 1.
|
|
1. 0.5 0.66666667 0.71428571]
|
|
|
|
mean value: 0.7830952380952381
|
|
|
|
key: train_precision
|
|
value: [0.9122807 0.94642857 0.9 0.92727273 0.92982456 0.91071429
|
|
0.92982456 0.92307692 0.93220339 0.92857143]
|
|
|
|
mean value: 0.9240197150455848
|
|
|
|
key: test_recall
|
|
value: [0.5 1. 1. 0.5 0.83333333 0.66666667
|
|
0.83333333 0.5 1. 0.83333333]
|
|
|
|
mean value: 0.7666666666666667
|
|
|
|
key: train_recall
|
|
value: [0.94545455 0.98148148 0.98181818 0.92727273 0.96363636 0.92727273
|
|
0.96363636 0.87272727 1. 0.94545455]
|
|
|
|
mean value: 0.9508754208754209
|
|
|
|
key: test_roc_auc
|
|
value: [0.67857143 0.75 0.75 0.75 0.83333333 0.83333333
|
|
0.91666667 0.5 0.75 0.75 ]
|
|
|
|
mean value: 0.7511904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [0.92643098 0.96346801 0.93636364 0.92727273 0.94545455 0.91818182
|
|
0.94545455 0.9 0.96363636 0.93636364]
|
|
|
|
mean value: 0.9362626262626262
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.7 0.66666667 0.5 0.71428571 0.66666667
|
|
0.83333333 0.33333333 0.66666667 0.625 ]
|
|
|
|
mean value: 0.613452380952381
|
|
|
|
key: train_jcc
|
|
value: [0.86666667 0.92982456 0.8852459 0.86440678 0.89830508 0.85
|
|
0.89830508 0.81355932 0.93220339 0.88135593]
|
|
|
|
mean value: 0.8819872722929859
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.58637333 0.45178008 0.48769093 0.53960443 0.72002649 0.48438072
|
|
0.47155118 0.46499372 0.60206747 0.50374007]
|
|
|
|
mean value: 0.5312208414077759
|
|
|
|
key: score_time
|
|
value: [0.01212716 0.01206875 0.01218867 0.01216054 0.01211762 0.01208639
|
|
0.01207018 0.0120542 0.01207995 0.01216769]
|
|
|
|
mean value: 0.012112116813659668
|
|
|
|
key: test_mcc
|
|
value: [ 1. 0.7200823 0.70710678 -0.16903085 0.35355339 0.84515425
|
|
0.70710678 0.84515425 0.4472136 0.84515425]
|
|
|
|
mean value: 0.6301494761529269
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.84615385 0.83333333 0.41666667 0.66666667 0.91666667
|
|
0.83333333 0.91666667 0.66666667 0.91666667]
|
|
|
|
mean value: 0.8012820512820513
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.875 0.85714286 0.46153846 0.71428571 0.90909091
|
|
0.85714286 0.92307692 0.75 0.92307692]
|
|
|
|
mean value: 0.8270354645354645
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 0.75 0.42857143 0.625 1.
|
|
0.75 0.85714286 0.6 0.85714286]
|
|
|
|
mean value: 0.7645634920634921
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.5 0.83333333 0.83333333
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.83333333 0.83333333 0.41666667 0.66666667 0.91666667
|
|
0.83333333 0.91666667 0.66666667 0.91666667]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.77777778 0.75 0.3 0.55555556 0.83333333
|
|
0.75 0.85714286 0.6 0.85714286]
|
|
|
|
mean value: 0.7280952380952381
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01720142 0.01370883 0.01067781 0.0106144 0.01026344 0.01019049
|
|
0.01044559 0.0101335 0.01021338 0.01029158]
|
|
|
|
mean value: 0.01137404441833496
|
|
|
|
key: score_time
|
|
value: [0.01184368 0.00893283 0.00880504 0.00879431 0.00844836 0.00852084
|
|
0.00842619 0.0083847 0.00850201 0.00854063]
|
|
|
|
mean value: 0.008919858932495117
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 0.70710678 1. 0.66666667
|
|
0.84515425 0.84515425 0.84515425 0.66666667]
|
|
|
|
mean value: 0.857590287870543
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 0.83333333 1. 0.83333333
|
|
0.91666667 0.91666667 0.91666667 0.83333333]
|
|
|
|
mean value: 0.925
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 0.85714286 1. 0.83333333
|
|
0.92307692 0.92307692 0.92307692 0.83333333]
|
|
|
|
mean value: 0.9293040293040293
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.75 1. 0.83333333
|
|
0.85714286 0.85714286 0.85714286 0.83333333]
|
|
|
|
mean value: 0.8988095238095238
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.83333333
|
|
1. 1. 1. 0.83333333]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 0.83333333 1. 0.83333333
|
|
0.91666667 0.91666667 0.91666667 0.83333333]
|
|
|
|
mean value: 0.925
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 0.75 1. 0.71428571
|
|
0.85714286 0.85714286 0.85714286 0.71428571]
|
|
|
|
mean value: 0.875
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08388996 0.08347058 0.08531809 0.08382106 0.08408999 0.08868432
|
|
0.08518457 0.08340192 0.0836916 0.0850246 ]
|
|
|
|
mean value: 0.08465766906738281
|
|
|
|
key: score_time
|
|
value: [0.01736975 0.01734281 0.01751995 0.01697803 0.01842737 0.0175755
|
|
0.01707554 0.01700139 0.01717949 0.01717973]
|
|
|
|
mean value: 0.01736495494842529
|
|
|
|
key: test_mcc
|
|
value: [0.7200823 0.85391256 0.84515425 0.35355339 1. 0.84515425
|
|
0.66666667 0.35355339 0.84515425 1. ]
|
|
|
|
mean value: 0.7483231075691826
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.92307692 0.91666667 0.66666667 1. 0.91666667
|
|
0.83333333 0.66666667 0.91666667 1. ]
|
|
|
|
mean value: 0.8685897435897436
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.93333333 0.92307692 0.6 1. 0.90909091
|
|
0.83333333 0.71428571 0.92307692 1. ]
|
|
|
|
mean value: 0.8636197136197136
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.85714286 0.75 1. 1.
|
|
0.83333333 0.625 0.85714286 1. ]
|
|
|
|
mean value: 0.8797619047619047
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 0.5 1. 0.83333333
|
|
0.83333333 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.91666667 0.91666667 0.66666667 1. 0.91666667
|
|
0.83333333 0.66666667 0.91666667 1. ]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.875 0.85714286 0.42857143 1. 0.83333333
|
|
0.71428571 0.55555556 0.85714286 1. ]
|
|
|
|
mean value: 0.7787698412698413
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00898266 0.00858808 0.00851655 0.00858903 0.00864077 0.00948691
|
|
0.00847912 0.00863051 0.0087626 0.00869036]
|
|
|
|
mean value: 0.008736658096313476
|
|
|
|
key: score_time
|
|
value: [0.00832868 0.00833035 0.0084331 0.00838184 0.0083549 0.00862789
|
|
0.008394 0.0083952 0.00895429 0.00837755]
|
|
|
|
mean value: 0.008457779884338379
|
|
|
|
key: test_mcc
|
|
value: [0.38095238 0.7200823 0.57735027 0.70710678 0.57735027 0.84515425
|
|
0.19245009 0.50709255 0.70710678 1. ]
|
|
|
|
mean value: 0.6214645678823325
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.84615385 0.75 0.83333333 0.75 0.91666667
|
|
0.58333333 0.75 0.83333333 1. ]
|
|
|
|
mean value: 0.7955128205128206
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.875 0.8 0.85714286 0.8 0.90909091
|
|
0.66666667 0.76923077 0.85714286 1. ]
|
|
|
|
mean value: 0.8200940725940726
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.77777778 0.66666667 0.75 0.66666667 1.
|
|
0.55555556 0.71428571 0.75 1. ]
|
|
|
|
mean value: 0.7547619047619047
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 1. 1. 0.83333333
|
|
0.83333333 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.69047619 0.83333333 0.75 0.83333333 0.75 0.91666667
|
|
0.58333333 0.75 0.83333333 1. ]
|
|
|
|
mean value: 0.7940476190476191
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.77777778 0.66666667 0.75 0.66666667 0.83333333
|
|
0.5 0.625 0.75 1. ]
|
|
|
|
mean value: 0.7069444444444445
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.08678007 1.09820795 1.08755231 1.05314875 1.06180525 1.14238811
|
|
1.17452717 1.0840826 1.10524988 1.14451408]
|
|
|
|
mean value: 1.103825616836548
|
|
|
|
key: score_time
|
|
value: [0.09358168 0.09317207 0.0871892 0.08708262 0.0869174 0.09442878
|
|
0.08805656 0.09175134 0.09073496 0.09464574]
|
|
|
|
mean value: 0.09075603485107422
|
|
|
|
key: test_mcc
|
|
value: [1. 0.85391256 0.84515425 0.50709255 0.84515425 0.84515425
|
|
0.66666667 0.70710678 0.70710678 1. ]
|
|
|
|
mean value: 0.7977348109892388
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.92307692 0.91666667 0.75 0.91666667 0.91666667
|
|
0.83333333 0.83333333 0.83333333 1. ]
|
|
|
|
mean value: 0.8923076923076924
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.93333333 0.92307692 0.72727273 0.92307692 0.90909091
|
|
0.83333333 0.85714286 0.85714286 1. ]
|
|
|
|
mean value: 0.8963469863469863
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.85714286 0.8 0.85714286 1.
|
|
0.83333333 0.75 0.75 1. ]
|
|
|
|
mean value: 0.8722619047619048
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.66666667 1. 0.83333333
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: 0.9333333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.91666667 0.91666667 0.75 0.91666667 0.91666667
|
|
0.83333333 0.83333333 0.83333333 1. ]
|
|
|
|
mean value: 0.8916666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.875 0.85714286 0.57142857 0.85714286 0.83333333
|
|
0.71428571 0.75 0.75 1. ]
|
|
|
|
mean value: 0.8208333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.84092927 0.86534286 0.87874079 0.89231968 0.94122863 0.90753007
|
|
0.87507176 0.86712623 0.88096118 0.87824702]
|
|
|
|
mean value: 0.8827497482299804
|
|
|
|
key: score_time
|
|
value: [0.21841073 0.10577917 0.20769811 0.22409463 0.21173596 0.19788694
|
|
0.1364646 0.233778 0.180516 0.2113955 ]
|
|
|
|
mean value: 0.19277596473693848
|
|
|
|
key: test_mcc
|
|
value: [0.7200823 0.7200823 0.84515425 0.50709255 0.70710678 0.84515425
|
|
0.66666667 0.70710678 0.70710678 1. ]
|
|
|
|
mean value: 0.7425552672166643
|
|
|
|
key: train_mcc
|
|
value: [0.96329966 1. 0.96363636 0.98198051 0.98198051 1.
|
|
0.98198051 1. 1. 1. ]
|
|
|
|
mean value: 0.9872877545121924
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.84615385 0.91666667 0.75 0.83333333 0.91666667
|
|
0.83333333 0.83333333 0.83333333 1. ]
|
|
|
|
mean value: 0.860897435897436
|
|
|
|
key: train_accuracy
|
|
value: [0.98165138 1. 0.98181818 0.99090909 0.99090909 1.
|
|
0.99090909 1. 1. 1. ]
|
|
|
|
mean value: 0.9936196830692243
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.875 0.92307692 0.72727273 0.85714286 0.90909091
|
|
0.83333333 0.85714286 0.85714286 1. ]
|
|
|
|
mean value: 0.8639202464202465
|
|
|
|
key: train_fscore
|
|
value: [0.98181818 1. 0.98181818 0.99082569 0.99082569 1.
|
|
0.99082569 1. 1. 1. ]
|
|
|
|
mean value: 0.9936113427856547
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 0.85714286 0.8 0.75 1.
|
|
0.83333333 0.75 0.75 1. ]
|
|
|
|
mean value: 0.8518253968253968
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.98181818 1. 0.98181818 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9963636363636363
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 0.66666667 1. 0.83333333
|
|
0.83333333 1. 1. 1. ]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_recall
|
|
value: [0.98181818 1. 0.98181818 0.98181818 0.98181818 1.
|
|
0.98181818 1. 1. 1. ]
|
|
|
|
mean value: 0.990909090909091
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.83333333 0.91666667 0.75 0.83333333 0.91666667
|
|
0.83333333 0.83333333 0.83333333 1. ]
|
|
|
|
mean value: 0.8583333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.98164983 1. 0.98181818 0.99090909 0.99090909 1.
|
|
0.99090909 1. 1. 1. ]
|
|
|
|
mean value: 0.9936195286195286
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.77777778 0.85714286 0.57142857 0.75 0.83333333
|
|
0.71428571 0.75 0.75 1. ]
|
|
|
|
mean value: 0.767063492063492
|
|
|
|
key: train_jcc
|
|
value: [0.96428571 1. 0.96428571 0.98181818 0.98181818 1.
|
|
0.98181818 1. 1. 1. ]
|
|
|
|
mean value: 0.9874025974025974
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02143669 0.00880098 0.00866723 0.00889945 0.00946045 0.00943971
|
|
0.00975537 0.00913382 0.00887299 0.00883484]
|
|
|
|
mean value: 0.01033015251159668
|
|
|
|
key: score_time
|
|
value: [0.01436877 0.00856876 0.00937319 0.0088954 0.00896883 0.00845623
|
|
0.00843525 0.00852799 0.00846863 0.00904155]
|
|
|
|
mean value: 0.009310460090637207
|
|
|
|
key: test_mcc
|
|
value: [ 0.21957752 0.53674504 0.33333333 0.16903085 0.50709255 -0.19245009
|
|
0.35355339 0. 0.57735027 0.4472136 ]
|
|
|
|
mean value: 0.2951446459204242
|
|
|
|
key: train_mcc
|
|
value: [0.55959596 0.59817936 0.69378191 0.74545455 0.62075223 0.6401844
|
|
0.49090909 0.6015931 0.65465367 0.56400939]
|
|
|
|
mean value: 0.6169113652737336
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.76923077 0.66666667 0.58333333 0.75 0.41666667
|
|
0.66666667 0.5 0.75 0.66666667]
|
|
|
|
mean value: 0.6384615384615384
|
|
|
|
key: train_accuracy
|
|
value: [0.77981651 0.79816514 0.84545455 0.87272727 0.80909091 0.81818182
|
|
0.74545455 0.8 0.82727273 0.78181818]
|
|
|
|
mean value: 0.8077981651376147
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.8 0.66666667 0.54545455 0.76923077 0.22222222
|
|
0.6 0.4 0.8 0.75 ]
|
|
|
|
mean value: 0.6099028749028749
|
|
|
|
key: train_fscore
|
|
value: [0.78181818 0.80357143 0.85217391 0.87272727 0.8173913 0.82758621
|
|
0.74545455 0.79245283 0.82882883 0.78571429]
|
|
|
|
mean value: 0.8107718797591079
|
|
|
|
key: test_precision
|
|
value: [0.6 0.75 0.66666667 0.6 0.71428571 0.33333333
|
|
0.75 0.5 0.66666667 0.6 ]
|
|
|
|
mean value: 0.618095238095238
|
|
|
|
key: train_precision
|
|
value: [0.78181818 0.77586207 0.81666667 0.87272727 0.78333333 0.78688525
|
|
0.74545455 0.82352941 0.82142857 0.77192982]
|
|
|
|
mean value: 0.7979635122621838
|
|
|
|
key: test_recall
|
|
value: [0.5 0.85714286 0.66666667 0.5 0.83333333 0.16666667
|
|
0.5 0.33333333 1. 1. ]
|
|
|
|
mean value: 0.6357142857142857
|
|
|
|
key: train_recall
|
|
value: [0.78181818 0.83333333 0.89090909 0.87272727 0.85454545 0.87272727
|
|
0.74545455 0.76363636 0.83636364 0.8 ]
|
|
|
|
mean value: 0.8251515151515152
|
|
|
|
key: test_roc_auc
|
|
value: [0.60714286 0.76190476 0.66666667 0.58333333 0.75 0.41666667
|
|
0.66666667 0.5 0.75 0.66666667]
|
|
|
|
mean value: 0.636904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [0.77979798 0.79848485 0.84545455 0.87272727 0.80909091 0.81818182
|
|
0.74545455 0.8 0.82727273 0.78181818]
|
|
|
|
mean value: 0.8078282828282829
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.66666667 0.5 0.375 0.625 0.125
|
|
0.42857143 0.25 0.66666667 0.6 ]
|
|
|
|
mean value: 0.46119047619047615
|
|
|
|
key: train_jcc
|
|
value: [0.64179104 0.67164179 0.74242424 0.77419355 0.69117647 0.70588235
|
|
0.5942029 0.65625 0.70769231 0.64705882]
|
|
|
|
mean value: 0.6832313479934091
|
|
|
|
MCC on Blind test: -0.02
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.04586267 0.04303479 0.05527663 0.03797793 0.04534817 0.04201865
|
|
0.04289937 0.04235387 0.04213738 0.04771972]
|
|
|
|
mean value: 0.04446291923522949
|
|
|
|
key: score_time
|
|
value: [0.01033163 0.01053166 0.01022077 0.01040626 0.01054192 0.01100063
|
|
0.01099896 0.0111537 0.01092267 0.01132393]
|
|
|
|
mean value: 0.010743212699890137
|
|
|
|
key: test_mcc
|
|
value: [1. 0.85391256 1. 0.84515425 1. 0.84515425
|
|
0.84515425 1. 0.84515425 0.66666667]
|
|
|
|
mean value: 0.89011962494107
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.92307692 1. 0.91666667 1. 0.91666667
|
|
0.91666667 1. 0.91666667 0.83333333]
|
|
|
|
mean value: 0.9423076923076923
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.93333333 1. 0.92307692 1. 0.90909091
|
|
0.92307692 1. 0.92307692 0.83333333]
|
|
|
|
mean value: 0.9444988344988345
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 1. 0.85714286 1. 1.
|
|
0.85714286 1. 0.85714286 0.83333333]
|
|
|
|
mean value: 0.9279761904761905
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.83333333
|
|
1. 1. 1. 0.83333333]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.91666667 1. 0.91666667 1. 0.91666667
|
|
0.91666667 1. 0.91666667 0.83333333]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.875 1. 0.85714286 1. 0.83333333
|
|
0.85714286 1. 0.85714286 0.71428571]
|
|
|
|
mean value: 0.8994047619047619
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 1.0
|
|
|
|
Accuracy on Blind test: 1.0
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02802229 0.05440402 0.03097749 0.02020931 0.03203607 0.05401206
|
|
0.05476737 0.0599885 0.0434413 0.04172421]
|
|
|
|
mean value: 0.04195826053619385
|
|
|
|
key: score_time
|
|
value: [0.0214808 0.02377057 0.01164842 0.01180601 0.0315659 0.0226531
|
|
0.02350903 0.01469707 0.02243733 0.02210069]
|
|
|
|
mean value: 0.020566892623901368
|
|
|
|
key: test_mcc
|
|
value: [0.09759001 0.85391256 0.84515425 0.16903085 0.30151134 0.84515425
|
|
0.66666667 0.70710678 0.50709255 0.84515425]
|
|
|
|
mean value: 0.583837353152416
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.53846154 0.92307692 0.91666667 0.58333333 0.58333333 0.91666667
|
|
0.83333333 0.83333333 0.75 0.91666667]
|
|
|
|
mean value: 0.7794871794871795
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.93333333 0.92307692 0.54545455 0.70588235 0.90909091
|
|
0.83333333 0.85714286 0.76923077 0.92307692]
|
|
|
|
mean value: 0.7971050518109342
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.875 0.85714286 0.6 0.54545455 1.
|
|
0.83333333 0.75 0.71428571 0.85714286]
|
|
|
|
mean value: 0.7532359307359308
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 0.5 1. 0.83333333
|
|
0.83333333 1. 0.83333333 1. ]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.54761905 0.91666667 0.91666667 0.58333333 0.58333333 0.91666667
|
|
0.83333333 0.83333333 0.75 0.91666667]
|
|
|
|
mean value: 0.7797619047619049
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.875 0.85714286 0.375 0.54545455 0.83333333
|
|
0.71428571 0.75 0.625 0.85714286]
|
|
|
|
mean value: 0.6832359307359307
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02012157 0.00975323 0.00962758 0.00935221 0.00946927 0.00961137
|
|
0.00936961 0.0094831 0.00953078 0.00941968]
|
|
|
|
mean value: 0.010573840141296387
|
|
|
|
key: score_time
|
|
value: [0.00972843 0.00950956 0.0092051 0.00918698 0.00915432 0.00908542
|
|
0.00867295 0.0086298 0.00913239 0.00918269]
|
|
|
|
mean value: 0.009148764610290527
|
|
|
|
key: test_mcc
|
|
value: [ 0.05143445 -0.09759001 0.57735027 -0.35355339 -0.19245009 0.57735027
|
|
0.16903085 -0.33333333 0.30151134 0.33333333]
|
|
|
|
mean value: 0.103308369627208
|
|
|
|
key: train_mcc
|
|
value: [0.45108055 0.40161889 0.50917508 0.40006613 0.32732684 0.50917508
|
|
0.49385654 0.43643578 0.50917508 0.47280543]
|
|
|
|
mean value: 0.4510715380447419
|
|
|
|
key: test_accuracy
|
|
value: [0.53846154 0.46153846 0.75 0.33333333 0.41666667 0.75
|
|
0.58333333 0.33333333 0.58333333 0.66666667]
|
|
|
|
mean value: 0.5416666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.72477064 0.69724771 0.75454545 0.7 0.66363636 0.75454545
|
|
0.74545455 0.71818182 0.75454545 0.73636364]
|
|
|
|
mean value: 0.724929107589658
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.53333333 0.8 0.2 0.53333333 0.66666667
|
|
0.54545455 0.33333333 0.70588235 0.66666667]
|
|
|
|
mean value: 0.5384670231729055
|
|
|
|
key: train_fscore
|
|
value: [0.71698113 0.65979381 0.75675676 0.69724771 0.66055046 0.75229358
|
|
0.73076923 0.72072072 0.75229358 0.73394495]
|
|
|
|
mean value: 0.7181351929984527
|
|
|
|
key: test_precision
|
|
value: [0.5 0.5 0.66666667 0.25 0.44444444 1.
|
|
0.6 0.33333333 0.54545455 0.66666667]
|
|
|
|
mean value: 0.5506565656565656
|
|
|
|
key: train_precision
|
|
value: [0.74509804 0.74418605 0.75 0.7037037 0.66666667 0.75925926
|
|
0.7755102 0.71428571 0.75925926 0.74074074]
|
|
|
|
mean value: 0.7358709633724291
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.57142857 1. 0.16666667 0.66666667 0.5
|
|
0.5 0.33333333 1. 0.66666667]
|
|
|
|
mean value: 0.5738095238095238
|
|
|
|
key: train_recall
|
|
value: [0.69090909 0.59259259 0.76363636 0.69090909 0.65454545 0.74545455
|
|
0.69090909 0.72727273 0.74545455 0.72727273]
|
|
|
|
mean value: 0.7028956228956229
|
|
|
|
key: test_roc_auc
|
|
value: [0.52380952 0.45238095 0.75 0.33333333 0.41666667 0.75
|
|
0.58333333 0.33333333 0.58333333 0.66666667]
|
|
|
|
mean value: 0.5392857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [0.72508418 0.6962963 0.75454545 0.7 0.66363636 0.75454545
|
|
0.74545455 0.71818182 0.75454545 0.73636364]
|
|
|
|
mean value: 0.7248653198653199
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.36363636 0.66666667 0.11111111 0.36363636 0.5
|
|
0.375 0.2 0.54545455 0.5 ]
|
|
|
|
mean value: 0.38755050505050503
|
|
|
|
key: train_jcc
|
|
value: [0.55882353 0.49230769 0.60869565 0.53521127 0.49315068 0.60294118
|
|
0.57575758 0.56338028 0.60294118 0.57971014]
|
|
|
|
mean value: 0.561291918174694
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01111698 0.01457047 0.01421618 0.01366043 0.01515484 0.01377654
|
|
0.01379347 0.01454496 0.0135591 0.01401854]
|
|
|
|
mean value: 0.01384115219116211
|
|
|
|
key: score_time
|
|
value: [0.00916433 0.01147294 0.01146817 0.01146865 0.0114131 0.01145601
|
|
0.01140928 0.01143718 0.01140952 0.01148391]
|
|
|
|
mean value: 0.01121830940246582
|
|
|
|
key: test_mcc
|
|
value: [0.85714286 0.7200823 0.70710678 0.4472136 0.66666667 0.84515425
|
|
0.57735027 0.84515425 0.57735027 0.70710678]
|
|
|
|
mean value: 0.6950328029341957
|
|
|
|
key: train_mcc
|
|
value: [0.94635821 0.98181818 0.94686415 0.77084557 0.92973479 0.89625816
|
|
0.75592895 0.94561086 0.94561086 0.91287093]
|
|
|
|
mean value: 0.9031900662843981
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.84615385 0.83333333 0.66666667 0.83333333 0.91666667
|
|
0.75 0.91666667 0.75 0.83333333]
|
|
|
|
mean value: 0.8269230769230769
|
|
|
|
key: train_accuracy
|
|
value: [0.97247706 0.99082569 0.97272727 0.87272727 0.96363636 0.94545455
|
|
0.86363636 0.97272727 0.97272727 0.95454545]
|
|
|
|
mean value: 0.9481484570475396
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.875 0.85714286 0.5 0.83333333 0.90909091
|
|
0.8 0.92307692 0.8 0.8 ]
|
|
|
|
mean value: 0.8220720945720946
|
|
|
|
key: train_fscore
|
|
value: [0.97345133 0.99082569 0.97345133 0.85416667 0.96226415 0.94827586
|
|
0.88 0.97297297 0.97297297 0.95238095]
|
|
|
|
mean value: 0.9480761920946578
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.77777778 0.75 1. 0.83333333 1.
|
|
0.66666667 0.85714286 0.66666667 1. ]
|
|
|
|
mean value: 0.8408730158730159
|
|
|
|
key: train_precision
|
|
value: [0.94827586 0.98181818 0.94827586 1. 1. 0.90163934
|
|
0.78571429 0.96428571 0.96428571 1. ]
|
|
|
|
mean value: 0.9494294964504122
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.33333333 0.83333333 0.83333333
|
|
1. 1. 1. 0.66666667]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.74545455 0.92727273 1.
|
|
1. 0.98181818 0.98181818 0.90909091]
|
|
|
|
mean value: 0.9545454545454546
|
|
|
|
key: test_roc_auc
|
|
value: [0.92857143 0.83333333 0.83333333 0.66666667 0.83333333 0.91666667
|
|
0.75 0.91666667 0.75 0.83333333]
|
|
|
|
mean value: 0.8261904761904763
|
|
|
|
key: train_roc_auc
|
|
value: [0.97222222 0.99090909 0.97272727 0.87272727 0.96363636 0.94545455
|
|
0.86363636 0.97272727 0.97272727 0.95454545]
|
|
|
|
mean value: 0.9481313131313132
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.77777778 0.75 0.33333333 0.71428571 0.83333333
|
|
0.66666667 0.85714286 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7123015873015873
|
|
|
|
key: train_jcc
|
|
value: [0.94827586 0.98181818 0.94827586 0.74545455 0.92727273 0.90163934
|
|
0.78571429 0.94736842 0.94736842 0.90909091]
|
|
|
|
mean value: 0.9042278559856138
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01410699 0.01302385 0.01327181 0.01243186 0.01311707 0.01251316
|
|
0.01321793 0.01300478 0.01298833 0.012604 ]
|
|
|
|
mean value: 0.01302797794342041
|
|
|
|
key: score_time
|
|
value: [0.01087165 0.01139569 0.0113461 0.01142955 0.01138949 0.01143646
|
|
0.01141524 0.01143265 0.01142383 0.01142192]
|
|
|
|
mean value: 0.011356258392333984
|
|
|
|
key: test_mcc
|
|
value: [1. 0.7200823 0.70710678 0.66666667 0.57735027 0.70710678
|
|
0.30151134 0.70710678 0.50709255 0.70710678]
|
|
|
|
mean value: 0.6601130257840452
|
|
|
|
key: train_mcc
|
|
value: [0.98181211 0.98181818 0.9104463 0.69102332 0.92973479 0.7793831
|
|
0.51416711 0.80111862 0.77084557 0.87402845]
|
|
|
|
mean value: 0.8234377558006962
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.84615385 0.83333333 0.83333333 0.75 0.83333333
|
|
0.58333333 0.83333333 0.75 0.83333333]
|
|
|
|
mean value: 0.8096153846153846
|
|
|
|
key: train_accuracy
|
|
value: [0.99082569 0.99082569 0.95454545 0.84545455 0.96363636 0.88181818
|
|
0.70909091 0.89090909 0.87272727 0.93636364]
|
|
|
|
mean value: 0.9036196830692244
|
|
|
|
key: test_fscore
|
|
value: [1. 0.875 0.85714286 0.83333333 0.8 0.8
|
|
0.28571429 0.85714286 0.72727273 0.8 ]
|
|
|
|
mean value: 0.7835606060606061
|
|
|
|
key: train_fscore
|
|
value: [0.99099099 0.99082569 0.95327103 0.8440367 0.96491228 0.86868687
|
|
0.58974359 0.90163934 0.85416667 0.93457944]
|
|
|
|
mean value: 0.8892852593662985
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 0.75 0.83333333 0.66666667 1.
|
|
1. 0.75 0.8 1. ]
|
|
|
|
mean value: 0.8577777777777778
|
|
|
|
key: train_precision
|
|
value: [0.98214286 0.98181818 0.98076923 0.85185185 0.93220339 0.97727273
|
|
1. 0.82089552 1. 0.96153846]
|
|
|
|
mean value: 0.9488492222611878
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.83333333 1. 0.66666667
|
|
0.16666667 1. 0.66666667 0.66666667]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.92727273 0.83636364 1. 0.78181818
|
|
0.41818182 1. 0.74545455 0.90909091]
|
|
|
|
mean value: 0.8618181818181818
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.83333333 0.83333333 0.83333333 0.75 0.83333333
|
|
0.58333333 0.83333333 0.75 0.83333333]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.99074074 0.99090909 0.95454545 0.84545455 0.96363636 0.88181818
|
|
0.70909091 0.89090909 0.87272727 0.93636364]
|
|
|
|
mean value: 0.9036195286195287
|
|
|
|
key: test_jcc
|
|
value: [1. 0.77777778 0.75 0.71428571 0.66666667 0.66666667
|
|
0.16666667 0.75 0.57142857 0.66666667]
|
|
|
|
mean value: 0.673015873015873
|
|
|
|
key: train_jcc
|
|
value: [0.98214286 0.98181818 0.91071429 0.73015873 0.93220339 0.76785714
|
|
0.41818182 0.82089552 0.74545455 0.87719298]
|
|
|
|
mean value: 0.816661945600227
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09890366 0.09237695 0.0930903 0.09278274 0.09307909 0.09162259
|
|
0.09250426 0.09289002 0.09270334 0.09173369]
|
|
|
|
mean value: 0.09316866397857666
|
|
|
|
key: score_time
|
|
value: [0.0157423 0.0158236 0.01600981 0.01580024 0.0157311 0.01570916
|
|
0.0157578 0.01616383 0.01525545 0.01590395]
|
|
|
|
mean value: 0.015789723396301268
|
|
|
|
key: test_mcc
|
|
value: [0.85714286 0.85391256 1. 0.84515425 0.84515425 0.84515425
|
|
0.84515425 1. 0.84515425 0.66666667]
|
|
|
|
mean value: 0.8603493361282073
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.92307692 1. 0.91666667 0.91666667 0.91666667
|
|
0.91666667 1. 0.91666667 0.83333333]
|
|
|
|
mean value: 0.9262820512820513
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.93333333 1. 0.92307692 0.92307692 0.90909091
|
|
0.92307692 1. 0.92307692 0.83333333]
|
|
|
|
mean value: 0.9291142191142191
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.875 1. 0.85714286 0.85714286 1.
|
|
0.85714286 1. 0.85714286 0.83333333]
|
|
|
|
mean value: 0.8994047619047619
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.83333333
|
|
1. 1. 1. 0.83333333]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92857143 0.91666667 1. 0.91666667 0.91666667 0.91666667
|
|
0.91666667 1. 0.91666667 0.83333333]
|
|
|
|
mean value: 0.9261904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.875 1. 0.85714286 0.85714286 0.83333333
|
|
0.85714286 1. 0.85714286 0.71428571]
|
|
|
|
mean value: 0.8708333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03606534 0.03642392 0.02919197 0.05094767 0.0475266 0.03480506
|
|
0.05056953 0.03121591 0.02797985 0.04955316]
|
|
|
|
mean value: 0.039427900314331056
|
|
|
|
key: score_time
|
|
value: [0.02251363 0.03537154 0.02169418 0.02613282 0.01689553 0.02987766
|
|
0.023453 0.02329683 0.02489948 0.03886795]
|
|
|
|
mean value: 0.026300263404846192
|
|
|
|
key: test_mcc
|
|
value: [1. 0.85391256 1. 0.84515425 1. 0.84515425
|
|
0.66666667 1. 0.84515425 0.66666667]
|
|
|
|
mean value: 0.8722708661348849
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.98198051]
|
|
|
|
mean value: 0.9981980506061966
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.92307692 1. 0.91666667 1. 0.91666667
|
|
0.83333333 1. 0.91666667 0.83333333]
|
|
|
|
mean value: 0.933974358974359
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99090909]
|
|
|
|
mean value: 0.9990909090909091
|
|
|
|
key: test_fscore
|
|
value: [1. 0.93333333 1. 0.92307692 1. 0.90909091
|
|
0.83333333 1. 0.92307692 0.83333333]
|
|
|
|
mean value: 0.9355244755244755
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99082569]
|
|
|
|
mean value: 0.9990825688073395
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 1. 0.85714286 1. 1.
|
|
0.83333333 1. 0.85714286 0.83333333]
|
|
|
|
mean value: 0.9255952380952381
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.83333333
|
|
0.83333333 1. 1. 0.83333333]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.98181818]
|
|
|
|
mean value: 0.9981818181818182
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.91666667 1. 0.91666667 1. 0.91666667
|
|
0.83333333 1. 0.91666667 0.83333333]
|
|
|
|
mean value: 0.9333333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99090909]
|
|
|
|
mean value: 0.9990909090909091
|
|
|
|
key: test_jcc
|
|
value: [1. 0.875 1. 0.85714286 1. 0.83333333
|
|
0.71428571 1. 0.85714286 0.71428571]
|
|
|
|
mean value: 0.8851190476190476
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.98181818]
|
|
|
|
mean value: 0.9981818181818182
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03284764 0.04813981 0.03564596 0.04268336 0.04672694 0.04632139
|
|
0.03857088 0.04267526 0.04141998 0.04225588]
|
|
|
|
mean value: 0.04172871112823486
|
|
|
|
key: score_time
|
|
value: [0.02080584 0.02099371 0.01225829 0.02310228 0.02433825 0.02418017
|
|
0.02202249 0.02044415 0.02062988 0.02041173]
|
|
|
|
mean value: 0.020918679237365723
|
|
|
|
key: test_mcc
|
|
value: [ 0.53674504 0.85391256 0.57735027 -0.16903085 1. 0.84515425
|
|
0.66666667 0.35355339 0.57735027 0.70710678]
|
|
|
|
mean value: 0.5948808384560212
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.92307692 0.75 0.41666667 1. 0.91666667
|
|
0.83333333 0.66666667 0.75 0.83333333]
|
|
|
|
mean value: 0.7858974358974359
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.93333333 0.8 0.46153846 1. 0.90909091
|
|
0.83333333 0.71428571 0.8 0.85714286]
|
|
|
|
mean value: 0.8035997335997336
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.875 0.66666667 0.42857143 1. 1.
|
|
0.83333333 0.625 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7645238095238095
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 0.5 1. 0.83333333
|
|
0.83333333 0.83333333 1. 1. ]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76190476 0.91666667 0.75 0.41666667 1. 0.91666667
|
|
0.83333333 0.66666667 0.75 0.83333333]
|
|
|
|
mean value: 0.7845238095238095
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.875 0.66666667 0.3 1. 0.83333333
|
|
0.71428571 0.55555556 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6932936507936508
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.20533681 0.19399977 0.19668961 0.19811511 0.19290471 0.19788265
|
|
0.22293591 0.19819999 0.19810653 0.19056296]
|
|
|
|
mean value: 0.19947340488433837
|
|
|
|
key: score_time
|
|
value: [0.00951982 0.00908279 0.00915742 0.00930476 0.00922418 0.00977135
|
|
0.00901246 0.00978947 0.00929618 0.00982547]
|
|
|
|
mean value: 0.009398388862609863
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 0.84515425 1. 0.84515425
|
|
0.84515425 0.84515425 0.84515425 0.66666667]
|
|
|
|
mean value: 0.8892437940309249
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 0.91666667 1. 0.91666667
|
|
0.91666667 0.91666667 0.91666667 0.83333333]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 0.92307692 1. 0.90909091
|
|
0.92307692 0.92307692 0.92307692 0.83333333]
|
|
|
|
mean value: 0.9434731934731935
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.85714286 1. 1.
|
|
0.85714286 0.85714286 0.85714286 0.83333333]
|
|
|
|
mean value: 0.9261904761904762
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.83333333
|
|
1. 1. 1. 0.83333333]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 0.91666667 1. 0.91666667
|
|
0.91666667 0.91666667 0.91666667 0.83333333]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 0.85714286 1. 0.83333333
|
|
0.85714286 0.85714286 0.85714286 0.71428571]
|
|
|
|
mean value: 0.8976190476190476
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01361418 0.01535249 0.01562023 0.01599288 0.01548743 0.01546836
|
|
0.01648307 0.02116895 0.01559067 0.01623702]
|
|
|
|
mean value: 0.016101527214050292
|
|
|
|
key: score_time
|
|
value: [0.01226568 0.01216841 0.01201534 0.01191258 0.01438951 0.01357484
|
|
0.01379561 0.01208305 0.01346803 0.0135932 ]
|
|
|
|
mean value: 0.012926626205444335
|
|
|
|
key: test_mcc
|
|
value: [0.59160798 0.85714286 0.84515425 0.4472136 0.84515425 0.57735027
|
|
0.70710678 0.84515425 0.84515425 0.84515425]
|
|
|
|
mean value: 0.7406192754971532
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.92307692 0.91666667 0.66666667 0.91666667 0.75
|
|
0.83333333 0.91666667 0.91666667 0.91666667]
|
|
|
|
mean value: 0.8525641025641025
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.92307692 0.90909091 0.5 0.90909091 0.66666667
|
|
0.8 0.90909091 0.90909091 0.90909091]
|
|
|
|
mean value: 0.8101864801864802
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.85714286 0.83333333 0.33333333 0.83333333 0.5
|
|
0.66666667 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7023809523809524
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.92857143 0.91666667 0.66666667 0.91666667 0.75
|
|
0.83333333 0.91666667 0.91666667 0.91666667]
|
|
|
|
mean value: 0.8511904761904763
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.85714286 0.83333333 0.33333333 0.83333333 0.5
|
|
0.66666667 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7023809523809524
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04587793 0.04113603 0.03955197 0.03398728 0.03290081 0.03291583
|
|
0.03302121 0.03304434 0.03281808 0.03287411]
|
|
|
|
mean value: 0.035812759399414064
|
|
|
|
key: score_time
|
|
value: [0.02072811 0.02086043 0.02074909 0.01967001 0.01166368 0.02316165
|
|
0.02328491 0.02323747 0.02353668 0.02355623]
|
|
|
|
mean value: 0.021044826507568358
|
|
|
|
key: test_mcc
|
|
value: [0.73192505 0.7200823 0.70710678 0.57735027 0.70710678 0.84515425
|
|
0.84515425 0.84515425 0.70710678 0.66666667]
|
|
|
|
mean value: 0.735280739813598
|
|
|
|
key: train_mcc
|
|
value: [0.94509941 0.98181818 0.94686415 0.98198051 0.96427411 0.96427411
|
|
0.98198051 0.98198051 0.98198051 0.96427411]
|
|
|
|
mean value: 0.9694526106078004
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.84615385 0.83333333 0.75 0.83333333 0.91666667
|
|
0.91666667 0.91666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8525641025641025
|
|
|
|
key: train_accuracy
|
|
value: [0.97247706 0.99082569 0.97272727 0.99090909 0.98181818 0.98181818
|
|
0.99090909 0.99090909 0.99090909 0.98181818]
|
|
|
|
mean value: 0.984512093411176
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.875 0.85714286 0.8 0.85714286 0.90909091
|
|
0.92307692 0.92307692 0.85714286 0.83333333]
|
|
|
|
mean value: 0.8692149517149517
|
|
|
|
key: train_fscore
|
|
value: [0.97297297 0.99082569 0.97345133 0.99099099 0.98214286 0.98214286
|
|
0.99099099 0.99099099 0.99099099 0.98214286]
|
|
|
|
mean value: 0.9847642523872531
|
|
|
|
key: test_precision
|
|
value: [0.75 0.77777778 0.75 0.66666667 0.75 1.
|
|
0.85714286 0.85714286 0.75 0.83333333]
|
|
|
|
mean value: 0.7992063492063493
|
|
|
|
key: train_precision
|
|
value: [0.96428571 0.98181818 0.94827586 0.98214286 0.96491228 0.96491228
|
|
0.98214286 0.98214286 0.98214286 0.96491228]
|
|
|
|
mean value: 0.9717688028849554
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.83333333
|
|
1. 1. 1. 0.83333333]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_recall
|
|
value: [0.98181818 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_8020.py:188: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_8020.py:191: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
0.9981818181818182
|
|
|
|
key: test_roc_auc
|
|
value: [0.85714286 0.83333333 0.83333333 0.75 0.83333333 0.91666667
|
|
0.91666667 0.91666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8523809523809525
|
|
|
|
key: train_roc_auc
|
|
value: [0.97239057 0.99090909 0.97272727 0.99090909 0.98181818 0.98181818
|
|
0.99090909 0.99090909 0.99090909 0.98181818]
|
|
|
|
mean value: 0.9845117845117844
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.77777778 0.75 0.66666667 0.75 0.83333333
|
|
0.85714286 0.85714286 0.75 0.71428571]
|
|
|
|
mean value: 0.7706349206349207
|
|
|
|
key: train_jcc
|
|
value: [0.94736842 0.98181818 0.94827586 0.98214286 0.96491228 0.96491228
|
|
0.98214286 0.98214286 0.98214286 0.96491228]
|
|
|
|
mean value: 0.9700770735616471
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.17134643 0.19228506 0.19199109 0.19097257 0.19410801 0.19272614
|
|
0.19422579 0.1924634 0.2073741 0.29488325]
|
|
|
|
mean value: 0.20223758220672608
|
|
|
|
key: score_time
|
|
value: [0.02177691 0.02197528 0.02310205 0.02106833 0.01600623 0.02016163
|
|
0.02284908 0.01529455 0.02007461 0.02356124]
|
|
|
|
mean value: 0.02058699131011963
|
|
|
|
key: test_mcc
|
|
value: [0.21957752 0.7200823 0.70710678 0.57735027 0.70710678 0.84515425
|
|
0.84515425 0.84515425 0.70710678 0.66666667]
|
|
|
|
mean value: 0.6840459859838001
|
|
|
|
key: train_mcc
|
|
value: [0.85372474 0.98181818 0.94686415 0.98198051 0.96427411 0.96427411
|
|
0.98198051 0.98198051 0.98198051 0.96427411]
|
|
|
|
mean value: 0.9603151429467506
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.84615385 0.83333333 0.75 0.83333333 0.91666667
|
|
0.91666667 0.91666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8294871794871795
|
|
|
|
key: train_accuracy
|
|
value: [0.9266055 0.99082569 0.97272727 0.99090909 0.98181818 0.98181818
|
|
0.99090909 0.99090909 0.99090909 0.98181818]
|
|
|
|
mean value: 0.9799249374478732
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.875 0.85714286 0.8 0.85714286 0.90909091
|
|
0.92307692 0.92307692 0.85714286 0.83333333]
|
|
|
|
mean value: 0.8380461205461205
|
|
|
|
key: train_fscore
|
|
value: [0.92857143 0.99082569 0.97345133 0.99099099 0.98214286 0.98214286
|
|
0.99099099 0.99099099 0.99099099 0.98214286]
|
|
|
|
mean value: 0.9803240979470986
|
|
|
|
key: test_precision
|
|
value: [0.6 0.77777778 0.75 0.66666667 0.75 1.
|
|
0.85714286 0.85714286 0.75 0.83333333]
|
|
|
|
mean value: 0.7842063492063492
|
|
|
|
key: train_precision
|
|
value: [0.9122807 0.98181818 0.94827586 0.98214286 0.96491228 0.96491228
|
|
0.98214286 0.98214286 0.98214286 0.96491228]
|
|
|
|
mean value: 0.9665683016318225
|
|
|
|
key: test_recall
|
|
value: [0.5 1. 1. 1. 1. 0.83333333
|
|
1. 1. 1. 0.83333333]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_recall
|
|
value: [0.94545455 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9945454545454545
|
|
|
|
key: test_roc_auc
|
|
value: [0.60714286 0.83333333 0.83333333 0.75 0.83333333 0.91666667
|
|
0.91666667 0.91666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8273809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [0.92643098 0.99090909 0.97272727 0.99090909 0.98181818 0.98181818
|
|
0.99090909 0.99090909 0.99090909 0.98181818]
|
|
|
|
mean value: 0.9799158249158249
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.77777778 0.75 0.66666667 0.75 0.83333333
|
|
0.85714286 0.85714286 0.75 0.71428571]
|
|
|
|
mean value: 0.7331349206349206
|
|
|
|
key: train_jcc
|
|
value: [0.86666667 0.98181818 0.94827586 0.98214286 0.96491228 0.96491228
|
|
0.98214286 0.98214286 0.98214286 0.96491228]
|
|
|
|
mean value: 0.9620068981230505
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.75
|